]> granicus.if.org Git - libjpeg-turbo/commitdiff
IJG R6b with x86SIMD V1.02 jpeg-6bx
authorMIYASAKA Masaru <alkaid@coral.ocn.ne.jp>
Sat, 4 Feb 2006 00:00:00 +0000 (00:00 +0000)
committerDRC <information@libjpeg-turbo.org>
Wed, 29 Jul 2015 21:36:25 +0000 (16:36 -0500)
Independent JPEG Group's JPEG software release 6b
with x86 SIMD extension for IJG JPEG library version 1.02

156 files changed:
aclocal.m4 [new file with mode: 0644]
altui/README.alt [new file with mode: 0644]
altui/cjpeg.c [new file with mode: 0644]
altui/djpeg.c [new file with mode: 0644]
altui/usage.alt [new file with mode: 0644]
cjpeg.c
ckconfig.c
config.guess [changed mode: 0755->0644]
config.sub [changed mode: 0755->0644]
config.ver [new file with mode: 0644]
configure
configure.in [new file with mode: 0644]
djpeg.c
install-sh
jccolmmx.asm [new file with mode: 0644]
jccolor.c
jccolss2.asm [new file with mode: 0644]
jcdctmgr.c
jcolsamp.h [new file with mode: 0644]
jcolsamp.inc [new file with mode: 0644]
jcomapi.c
jconfig.bc5 [new file with mode: 0644]
jconfig.cfg
jconfig.dj
jconfig.linux [new file with mode: 0644]
jconfig.mgw [new file with mode: 0644]
jconfig.vc
jcqnt3dn.asm [new file with mode: 0644]
jcqntflt.asm [new file with mode: 0644]
jcqntint.asm [new file with mode: 0644]
jcqntmmx.asm [new file with mode: 0644]
jcqnts2f.asm [new file with mode: 0644]
jcqnts2i.asm [new file with mode: 0644]
jcqntsse.asm [new file with mode: 0644]
jcsammmx.asm [new file with mode: 0644]
jcsample.c
jcsamss2.asm [new file with mode: 0644]
jdcoefct.c
jdcolmmx.asm [new file with mode: 0644]
jdcolor.c
jdcolss2.asm [new file with mode: 0644]
jdct.h
jdct.inc [new file with mode: 0644]
jddctmgr.c
jdhuff.c
jdhuff.h
jdmerge.c
jdmermmx.asm [new file with mode: 0644]
jdmerss2.asm [new file with mode: 0644]
jdphuff.c
jdsammmx.asm [new file with mode: 0644]
jdsample.c
jdsamss2.asm [new file with mode: 0644]
jf3dnflt.asm [new file with mode: 0644]
jfdctflt.asm [new file with mode: 0644]
jfdctfst.asm [new file with mode: 0644]
jfdctint.asm [new file with mode: 0644]
jfmmxfst.asm [new file with mode: 0644]
jfmmxint.asm [new file with mode: 0644]
jfss2fst.asm [new file with mode: 0644]
jfss2int.asm [new file with mode: 0644]
jfsseflt.asm [new file with mode: 0644]
ji3dnflt.asm [new file with mode: 0644]
jidctflt.asm [new file with mode: 0644]
jidctfst.asm [new file with mode: 0644]
jidctint.asm [new file with mode: 0644]
jidctred.asm [new file with mode: 0644]
jimmxfst.asm [new file with mode: 0644]
jimmxint.asm [new file with mode: 0644]
jimmxred.asm [new file with mode: 0644]
jiss2flt.asm [new file with mode: 0644]
jiss2fst.asm [new file with mode: 0644]
jiss2int.asm [new file with mode: 0644]
jiss2red.asm [new file with mode: 0644]
jisseflt.asm [new file with mode: 0644]
jmemmgr.c
jmorecfg.h
jpegdll.def [new file with mode: 0644]
jpegdll.rc [new file with mode: 0644]
jpegint.h
jpeglib.h
jsimdcpu.asm [new file with mode: 0644]
jsimddjg.asm [new file with mode: 0644]
jsimdext.inc [new file with mode: 0644]
jsimdgcc.c [new file with mode: 0644]
jsimdw32.asm [new file with mode: 0644]
libjpeg.spec [new file with mode: 0644]
ltconfig [deleted file]
ltmain.sh
makecfg.c [new file with mode: 0644]
makefile.ansi
makefile.bc5 [new file with mode: 0644]
makefile.cfg
makefile.dj
makefile.linux [new file with mode: 0644]
makefile.mgw [new file with mode: 0644]
makefile.mgwdll [new file with mode: 0644]
makefile.unix
makefile.vc
makefile.vcdll [new file with mode: 0644]
nasm_lt.sh [new file with mode: 0644]
rdbmp.c
rdgif.c
simd_README.ja.txt [new file with mode: 0644]
simd_cdjpeg.ja.txt [new file with mode: 0644]
simd_changes.ja.txt [new file with mode: 0644]
simd_filelist.ja.txt [new file with mode: 0644]
simd_install.ja.txt [new file with mode: 0644]
simd_internal.ja.txt [new file with mode: 0644]
unused/jconfig.bcc [moved from jconfig.bcc with 100% similarity]
unused/jconfig.mac [moved from jconfig.mac with 100% similarity]
unused/jconfig.manx [moved from jconfig.manx with 100% similarity]
unused/jconfig.mc6 [moved from jconfig.mc6 with 100% similarity]
unused/jconfig.sas [moved from jconfig.sas with 100% similarity]
unused/jconfig.st [moved from jconfig.st with 100% similarity]
unused/jconfig.vms [moved from jconfig.vms with 100% similarity]
unused/jconfig.wat [moved from jconfig.wat with 100% similarity]
unused/jfdctflt.c [moved from jfdctflt.c with 100% similarity]
unused/jfdctfst.c [moved from jfdctfst.c with 100% similarity]
unused/jfdctint.c [moved from jfdctint.c with 100% similarity]
unused/jidctflt.c [moved from jidctflt.c with 100% similarity]
unused/jidctfst.c [moved from jidctfst.c with 100% similarity]
unused/jidctint.c [moved from jidctint.c with 100% similarity]
unused/jidctred.c [moved from jidctred.c with 100% similarity]
unused/jmemdos.c [moved from jmemdos.c with 100% similarity]
unused/jmemdosa.asm [moved from jmemdosa.asm with 100% similarity]
unused/jmemmac.c [moved from jmemmac.c with 100% similarity]
unused/makcjpeg.st [moved from makcjpeg.st with 100% similarity]
unused/makdjpeg.st [moved from makdjpeg.st with 100% similarity]
unused/makeapps.ds [moved from makeapps.ds with 100% similarity]
unused/makefile.bcc [moved from makefile.bcc with 100% similarity]
unused/makefile.manx [moved from makefile.manx with 100% similarity]
unused/makefile.mc6 [moved from makefile.mc6 with 100% similarity]
unused/makefile.mms [moved from makefile.mms with 100% similarity]
unused/makefile.sas [moved from makefile.sas with 100% similarity]
unused/makefile.vms [moved from makefile.vms with 100% similarity]
unused/makefile.wat [moved from makefile.wat with 100% similarity]
unused/makelib.ds [moved from makelib.ds with 100% similarity]
unused/makeproj.mac [moved from makeproj.mac with 100% similarity]
unused/makljpeg.st [moved from makljpeg.st with 100% similarity]
unused/maktjpeg.st [moved from maktjpeg.st with 100% similarity]
unused/makvms.opt [moved from makvms.opt with 100% similarity]
unused/rdgif.c [new file with mode: 0644]
unused/wrgif.c [new file with mode: 0644]
vc6proj/apptest.dsp [new file with mode: 0644]
vc6proj/cjpeg.dsp [new file with mode: 0644]
vc6proj/djpeg.dsp [new file with mode: 0644]
vc6proj/jconfig.h [new file with mode: 0644]
vc6proj/jpegtran.dsp [new file with mode: 0644]
vc6proj/libjpeg.dsp [new file with mode: 0644]
vc6proj/libjpeg.dsw [new file with mode: 0644]
vc6proj/makecfg.dsp [new file with mode: 0644]
vc6proj/rdjpgcom.dsp [new file with mode: 0644]
vc6proj/wrjpgcom.dsp [new file with mode: 0644]
wrbmp.c
wrgif.c

diff --git a/aclocal.m4 b/aclocal.m4
new file mode 100644 (file)
index 0000000..54e986b
--- /dev/null
@@ -0,0 +1,3655 @@
+# generated automatically by aclocal 1.8.5 -*- Autoconf -*-
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
+# Free Software Foundation, Inc.
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+# libtool.m4 - Configure libtool for the host system. -*-Shell-script-*-
+
+# serial 46 AC_PROG_LIBTOOL
+
+AC_DEFUN([AC_PROG_LIBTOOL],
+[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+AC_SUBST(LIBTOOL)dnl
+
+# Prevent multiple expansion
+define([AC_PROG_LIBTOOL], [])
+])
+
+AC_DEFUN([AC_LIBTOOL_SETUP],
+[AC_PREREQ(2.13)dnl
+AC_REQUIRE([AC_ENABLE_SHARED])dnl
+AC_REQUIRE([AC_ENABLE_STATIC])dnl
+AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_LD])dnl
+AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl
+AC_REQUIRE([AC_PROG_NM])dnl
+AC_REQUIRE([LT_AC_PROG_SED])dnl
+
+AC_REQUIRE([AC_PROG_LN_S])dnl
+AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl
+AC_REQUIRE([AC_OBJEXT])dnl
+AC_REQUIRE([AC_EXEEXT])dnl
+dnl
+
+_LT_AC_PROG_ECHO_BACKSLASH
+# Only perform the check for file, if the check method requires it
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    AC_PATH_MAGIC
+  fi
+  ;;
+esac
+
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+AC_CHECK_TOOL(STRIP, strip, :)
+
+ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no)
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+enable_win32_dll=yes, enable_win32_dll=no)
+
+AC_ARG_ENABLE(libtool-lock,
+  [  --disable-libtool-lock  avoid locking (might break parallel builds)])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -32"
+      ;;
+    *N32*)
+      LD="${LD-ld} -n32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -64"
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+    [AC_LANG_SAVE
+     AC_LANG_C
+     AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+     AC_LANG_RESTORE])
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+[*-*-cygwin* | *-*-mingw* | *-*-pw32*)
+  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+  AC_CHECK_TOOL(AS, as, false)
+  AC_CHECK_TOOL(OBJDUMP, objdump, false)
+
+  # recent cygwin and mingw systems supply a stub DllMain which the user
+  # can override, but on older systems we have to supply one
+  AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain,
+    [AC_TRY_LINK([],
+      [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*);
+      DllMain (0, 0, 0);],
+      [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])])
+
+  case $host/$CC in
+  *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*)
+    # old mingw systems require "-dll" to link a DLL, while more recent ones
+    # require "-mdll"
+    SAVE_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS -mdll"
+    AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch,
+      [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])])
+    CFLAGS="$SAVE_CFLAGS" ;;
+  *-*-cygwin* | *-*-pw32*)
+    # cygwin systems need to pass --dll to the linker, and not link
+    # crt.o which will require a WinMain@16 definition.
+    lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;;
+  esac
+  ;;
+  ])
+esac
+
+_LT_AC_LTCONFIG_HACK
+
+])
+
+# AC_LIBTOOL_HEADER_ASSERT
+# ------------------------
+AC_DEFUN([AC_LIBTOOL_HEADER_ASSERT],
+[AC_CACHE_CHECK([whether $CC supports assert without backlinking],
+    [lt_cv_func_assert_works],
+    [case $host in
+    *-*-solaris*)
+      if test "$GCC" = yes && test "$with_gnu_ld" != yes; then
+        case `$CC --version 2>/dev/null` in
+        [[12]].*) lt_cv_func_assert_works=no ;;
+        *)        lt_cv_func_assert_works=yes ;;
+        esac
+      fi
+      ;;
+    esac])
+
+if test "x$lt_cv_func_assert_works" = xyes; then
+  AC_CHECK_HEADERS(assert.h)
+fi
+])# AC_LIBTOOL_HEADER_ASSERT
+
+# _LT_AC_CHECK_DLFCN
+# --------------------
+AC_DEFUN([_LT_AC_CHECK_DLFCN],
+[AC_CHECK_HEADERS(dlfcn.h)
+])# _LT_AC_CHECK_DLFCN
+
+# AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+# ---------------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE],
+[AC_REQUIRE([AC_CANONICAL_HOST])
+AC_REQUIRE([AC_PROG_NM])
+AC_REQUIRE([AC_OBJEXT])
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+AC_MSG_CHECKING([command to parse $NM output])
+AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [dnl
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[[BCDEGRST]]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
+
+# Transform the above into a raw symbol and a C symbol.
+symxfrm='\1 \2\3 \3'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[[BCDT]]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[[ABCDGISTW]]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+irix* | nonstopux*)
+  symcode='[[BCDEGRST]]'
+  ;;
+osf*)
+  symcode='[[BCDEGQRST]]'
+  ;;
+solaris* | sysv5*)
+  symcode='[[BDT]]'
+  ;;
+sysv4)
+  symcode='[[DFNSTU]]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $host_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
+  symcode='[[ABCDGISTW]]'
+fi
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Write the raw and C identifiers.
+lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[       ]]\($symcode$symcode*\)[[       ]][[    ]]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+       mv -f "$nlist"T "$nlist"
+      else
+       rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if egrep ' nm_test_var$' "$nlist" >/dev/null; then
+       if egrep ' nm_test_func$' "$nlist" >/dev/null; then
+         cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+         # Now generate the symbol file.
+         eval "$lt_cv_global_symbol_to_cdecl"' < "$nlist" >> conftest.$ac_ext'
+
+         cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr void *
+#else
+# define lt_ptr char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr address;
+}
+lt_preloaded_symbols[[]] =
+{
+EOF
+         sed "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext
+         cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+         # Now try linking the two files.
+         mv conftest.$ac_objext conftstm.$ac_objext
+         save_LIBS="$LIBS"
+         save_CFLAGS="$CFLAGS"
+         LIBS="conftstm.$ac_objext"
+         CFLAGS="$CFLAGS$no_builtin_flag"
+         if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then
+           pipe_works=yes
+         fi
+         LIBS="$save_LIBS"
+         CFLAGS="$save_CFLAGS"
+       else
+         echo "cannot find nm_test_func in $nlist" >&AC_FD_CC
+       fi
+      else
+       echo "cannot find nm_test_var in $nlist" >&AC_FD_CC
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AC_FD_CC
+    fi
+  else
+    echo "$progname: failed program was:" >&AC_FD_CC
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+])
+global_symbol_pipe="$lt_cv_sys_global_symbol_pipe"
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  global_symbol_to_cdecl=
+  global_symbol_to_c_name_address=
+else
+  global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl"
+  global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address"
+fi
+if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address";
+then
+  AC_MSG_RESULT(failed)
+else
+  AC_MSG_RESULT(ok)
+fi
+]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+
+# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR
+# ---------------------------------
+AC_DEFUN([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR],
+[# Find the correct PATH separator.  Usually this is `:', but
+# DJGPP uses `;' like DOS.
+if test "X${PATH_SEPARATOR+set}" != Xset; then
+  UNAME=${UNAME-`uname 2>/dev/null`}
+  case X$UNAME in
+    *-DOS) lt_cv_sys_path_separator=';' ;;
+    *)     lt_cv_sys_path_separator=':' ;;
+  esac
+  PATH_SEPARATOR=$lt_cv_sys_path_separator
+fi
+])# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR
+
+# _LT_AC_PROG_ECHO_BACKSLASH
+# --------------------------
+# Add some code to the start of the generated configure script which
+# will find an echo command which doesn't interpret backslashes.
+AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH],
+[ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)],
+                             [AC_DIVERT_PUSH(NOTICE)])
+_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR
+
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+case X$ECHO in
+X*--fallback-echo)
+  # Remove one level of quotation (which was required for Make).
+  ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','`
+  ;;
+esac
+
+echo=${ECHO-echo}
+if test "X[$]1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X[$]1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell.
+  exec $SHELL "[$]0" --no-reexec ${1+"[$]@"}
+fi
+
+if test "X[$]1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+$*
+EOF
+  exit 0
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+if test -z "$ECHO"; then
+if test "X${echo_test_string+set}" != Xset; then
+# find a string as large as possible, as long as the shell can cope with it
+  for cmd in 'sed 50q "[$]0"' 'sed 20q "[$]0"' 'sed 10q "[$]0"' 'sed 2q "[$]0"' 'echo test'; do
+    # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+    if (echo_test_string="`eval $cmd`") 2>/dev/null &&
+       echo_test_string="`eval $cmd`" &&
+       (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null
+    then
+      break
+    fi
+  done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+   echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+   test "X$echo_testing_string" = "X$echo_test_string"; then
+  :
+else
+  # The Solaris, AIX, and Digital Unix default echo programs unquote
+  # backslashes.  This makes it impossible to quote backslashes using
+  #   echo "$something" | sed 's/\\/\\\\/g'
+  #
+  # So, first we look for a working echo in the user's PATH.
+
+  IFS="${IFS=  }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for dir in $PATH /usr/ucb; do
+    if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+       test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      echo="$dir/echo"
+      break
+    fi
+  done
+  IFS="$save_ifs"
+
+  if test "X$echo" = Xecho; then
+    # We didn't find a better echo, so look for alternatives.
+    if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      # This shell has a builtin print -r that does the trick.
+      echo='print -r'
+    elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+        test "X$CONFIG_SHELL" != X/bin/ksh; then
+      # If we have ksh, try running configure again with it.
+      ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh}
+      export ORIGINAL_CONFIG_SHELL
+      CONFIG_SHELL=/bin/ksh
+      export CONFIG_SHELL
+      exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"}
+    else
+      # Try using printf.
+      echo='printf %s\n'
+      if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+        echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+        test "X$echo_testing_string" = "X$echo_test_string"; then
+       # Cool, printf works
+       :
+      elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+          test "X$echo_testing_string" = 'X\t' &&
+          echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+          test "X$echo_testing_string" = "X$echo_test_string"; then
+       CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL
+       export CONFIG_SHELL
+       SHELL="$CONFIG_SHELL"
+       export SHELL
+       echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+          test "X$echo_testing_string" = 'X\t' &&
+          echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+          test "X$echo_testing_string" = "X$echo_test_string"; then
+       echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      else
+       # maybe with a smaller string...
+       prev=:
+
+       for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do
+         if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null
+         then
+           break
+         fi
+         prev="$cmd"
+       done
+
+       if test "$prev" != 'sed 50q "[$]0"'; then
+         echo_test_string=`eval $prev`
+         export echo_test_string
+         exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"}
+       else
+         # Oops.  We lost completely, so just stick with echo.
+         echo=echo
+       fi
+      fi
+    fi
+  fi
+fi
+fi
+
+# Copy echo and quote the copy suitably for passing to libtool from
+# the Makefile, instead of quoting the original, which is used later.
+ECHO=$echo
+if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then
+   ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo"
+fi
+
+AC_SUBST(ECHO)
+AC_DIVERT_POP
+])# _LT_AC_PROG_ECHO_BACKSLASH
+
+# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
+#                           ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
+# ------------------------------------------------------------------
+AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF],
+[if test "$cross_compiling" = yes; then :
+  [$4]
+else
+  AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+[#line __oline__ "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+
+    exit (status);
+}]
+EOF
+  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) $1 ;;
+      x$lt_dlneed_uscore) $2 ;;
+      x$lt_unknown|x*) $3 ;;
+    esac
+  else :
+    # compilation failed
+    $3
+  fi
+fi
+rm -fr conftest*
+])# _LT_AC_TRY_DLOPEN_SELF
+
+# AC_LIBTOOL_DLOPEN_SELF
+# -------------------
+AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF],
+[if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+   ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+          [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+            [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"],
+       [AC_CHECK_FUNC([dlopen],
+             [lt_cv_dlopen="dlopen"],
+         [AC_CHECK_LIB([dl], [dlopen],
+               [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+           [AC_CHECK_LIB([svld], [dlopen],
+                 [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+             [AC_CHECK_LIB([dld], [dld_link],
+                   [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"])
+             ])
+           ])
+         ])
+       ])
+      ])
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    AC_CACHE_CHECK([whether a program can dlopen itself],
+         lt_cv_dlopen_self, [dnl
+         _LT_AC_TRY_DLOPEN_SELF(
+           lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
+           lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
+    ])
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      LDFLAGS="$LDFLAGS $link_static_flag"
+      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
+         lt_cv_dlopen_self_static, [dnl
+         _LT_AC_TRY_DLOPEN_SELF(
+           lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
+           lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
+      ])
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+])# AC_LIBTOOL_DLOPEN_SELF
+
+AC_DEFUN([_LT_AC_LTCONFIG_HACK],
+[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])dnl
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e s/^X//'
+sed_quote_subst='s/\([[\\"\\`$\\\\]]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([[\\"\\`\\\\]]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Constants:
+rm="rm -f"
+
+# Global variables:
+default_ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except M$VC,
+# which needs '.lib').
+libext=a
+ltmain="$ac_aux_dir/ltmain.sh"
+ofile="$default_ofile"
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+need_locks="$enable_libtool_lock"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AR_FLAGS" && AR_FLAGS=cru
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+test -z "$NM" && NM=nm
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$ac_objext" && ac_objext=o
+
+if test x"$host" != x"$build"; then
+  ac_tool_prefix=${host_alias}-
+else
+  ac_tool_prefix=
+fi
+
+# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
+case $host_os in
+linux-gnu*) ;;
+linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
+esac
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds"
+    ;;
+  *)
+    old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+fi
+
+# Allow CC to be a program name with arguments.
+set dummy $CC
+compiler="[$]2"
+
+AC_MSG_CHECKING([for objdir])
+rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+AC_MSG_RESULT($objdir)
+
+
+AC_ARG_WITH(pic,
+[  --with-pic              try to use only PIC/non-PIC objects [default=use both]],
+pic_mode="$withval", pic_mode=default)
+test -z "$pic_mode" && pic_mode=default
+
+# We assume here that the value for lt_cv_prog_cc_pic will not be cached
+# in isolation, and that seeing it set (from the cache) indicates that
+# the associated values are set (in the cache) correctly too.
+AC_MSG_CHECKING([for $compiler option to produce PIC])
+AC_CACHE_VAL(lt_cv_prog_cc_pic,
+[ lt_cv_prog_cc_pic=
+  lt_cv_prog_cc_shlib=
+  lt_cv_prog_cc_wl=
+  lt_cv_prog_cc_static=
+  lt_cv_prog_cc_no_builtin=
+  lt_cv_prog_cc_can_build_shared=$can_build_shared
+
+  if test "$GCC" = yes; then
+    lt_cv_prog_cc_wl='-Wl,'
+    lt_cv_prog_cc_static='-static'
+
+    case $host_os in
+    aix*)
+      # Below there is a dirty hack to force normal static linking with -ldl
+      # The problem is because libdl dynamically linked with both libc and
+      # libC (AIX C++ library), which obviously doesn't included in libraries
+      # list by gcc. This cause undefined symbols with -static flags.
+      # This hack allows C programs to be linked with "-static -ldl", but
+      # not sure about C++ programs.
+      lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC"
+      ;;
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4'
+      ;;
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_cv_prog_cc_pic='-fno-common'
+      ;;
+    cygwin* | mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_cv_prog_cc_pic='-DDLL_EXPORT'
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+        lt_cv_prog_cc_pic=-Kconform_pic
+      fi
+      ;;
+    *)
+      lt_cv_prog_cc_pic='-fPIC'
+      ;;
+    esac
+  else
+    # PORTME Check for PIC flags for the system compiler.
+    case $host_os in
+    aix3* | aix4* | aix5*)
+      lt_cv_prog_cc_wl='-Wl,'
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_cv_prog_cc_static='-Bstatic'
+      else
+       lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      # Is there a better lt_cv_prog_cc_static that works with the bundled CC?
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static="${lt_cv_prog_cc_wl}-a ${lt_cv_prog_cc_wl}archive"
+      lt_cv_prog_cc_pic='+Z'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static='-non_shared'
+      # PIC (with -KPIC) is the default.
+      ;;
+
+    cygwin* | mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_cv_prog_cc_pic='-DDLL_EXPORT'
+      ;;
+
+    newsos6)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      # All OSF/1 code is PIC.
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static='-non_shared'
+      ;;
+
+    sco3.2v5*)
+      lt_cv_prog_cc_pic='-Kpic'
+      lt_cv_prog_cc_static='-dn'
+      lt_cv_prog_cc_shlib='-belf'
+      ;;
+
+    solaris*)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Wl,'
+      ;;
+
+    sunos4*)
+      lt_cv_prog_cc_pic='-PIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Qoption ld '
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Wl,'
+      ;;
+
+    uts4*)
+      lt_cv_prog_cc_pic='-pic'
+      lt_cv_prog_cc_static='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       lt_cv_prog_cc_pic='-Kconform_pic'
+       lt_cv_prog_cc_static='-Bstatic'
+      fi
+      ;;
+
+    *)
+      lt_cv_prog_cc_can_build_shared=no
+      ;;
+    esac
+  fi
+])
+if test -z "$lt_cv_prog_cc_pic"; then
+  AC_MSG_RESULT([none])
+else
+  AC_MSG_RESULT([$lt_cv_prog_cc_pic])
+
+  # Check to make sure the pic_flag actually works.
+  AC_MSG_CHECKING([if $compiler PIC flag $lt_cv_prog_cc_pic works])
+  AC_CACHE_VAL(lt_cv_prog_cc_pic_works, [dnl
+    save_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS $lt_cv_prog_cc_pic -DPIC"
+    AC_TRY_COMPILE([], [], [dnl
+      case $host_os in
+      hpux9* | hpux10* | hpux11*)
+       # On HP-UX, both CC and GCC only warn that PIC is supported... then
+       # they create non-PIC objects.  So, if there were any warnings, we
+       # assume that PIC is not supported.
+       if test -s conftest.err; then
+         lt_cv_prog_cc_pic_works=no
+       else
+         lt_cv_prog_cc_pic_works=yes
+       fi
+       ;;
+      *)
+       lt_cv_prog_cc_pic_works=yes
+       ;;
+      esac
+    ], [dnl
+      lt_cv_prog_cc_pic_works=no
+    ])
+    CFLAGS="$save_CFLAGS"
+  ])
+
+  if test "X$lt_cv_prog_cc_pic_works" = Xno; then
+    lt_cv_prog_cc_pic=
+    lt_cv_prog_cc_can_build_shared=no
+  else
+    lt_cv_prog_cc_pic=" $lt_cv_prog_cc_pic"
+  fi
+
+  AC_MSG_RESULT([$lt_cv_prog_cc_pic_works])
+fi
+
+# Check for any special shared library compilation flags.
+if test -n "$lt_cv_prog_cc_shlib"; then
+  AC_MSG_WARN([\`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries])
+  if echo "$old_CC $old_CFLAGS " | egrep -e "[[        ]]$lt_cv_prog_cc_shlib[[        ]]" >/dev/null; then :
+  else
+   AC_MSG_WARN([add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure])
+    lt_cv_prog_cc_can_build_shared=no
+  fi
+fi
+
+AC_MSG_CHECKING([if $compiler static flag $lt_cv_prog_cc_static works])
+AC_CACHE_VAL([lt_cv_prog_cc_static_works], [dnl
+  lt_cv_prog_cc_static_works=no
+  save_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS $lt_cv_prog_cc_static"
+  AC_TRY_LINK([], [], [lt_cv_prog_cc_static_works=yes])
+  LDFLAGS="$save_LDFLAGS"
+])
+
+# Belt *and* braces to stop my trousers falling down:
+test "X$lt_cv_prog_cc_static_works" = Xno && lt_cv_prog_cc_static=
+AC_MSG_RESULT([$lt_cv_prog_cc_static_works])
+
+pic_flag="$lt_cv_prog_cc_pic"
+special_shlib_compile_flags="$lt_cv_prog_cc_shlib"
+wl="$lt_cv_prog_cc_wl"
+link_static_flag="$lt_cv_prog_cc_static"
+no_builtin_flag="$lt_cv_prog_cc_no_builtin"
+can_build_shared="$lt_cv_prog_cc_can_build_shared"
+
+
+# Check to see if options -o and -c are simultaneously supported by compiler
+AC_MSG_CHECKING([if $compiler supports -c -o file.$ac_objext])
+AC_CACHE_VAL([lt_cv_compiler_c_o], [
+$rm -r conftest 2>/dev/null
+mkdir conftest
+cd conftest
+echo "int some_variable = 0;" > conftest.$ac_ext
+mkdir out
+# According to Tom Tromey, Ian Lance Taylor reported there are C compilers
+# that will create temporary files in the current directory regardless of
+# the output directory.  Thus, making CWD read-only will cause this test
+# to fail, enabling locking or at least warning the user not to do parallel
+# builds.
+chmod -w .
+save_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS -o out/conftest2.$ac_objext"
+compiler_c_o=no
+if { (eval echo configure:__oline__: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.$ac_objext; then
+  # The compiler can only warn and ignore the option if not recognized
+  # So say no if there are warnings
+  if test -s out/conftest.err; then
+    lt_cv_compiler_c_o=no
+  else
+    lt_cv_compiler_c_o=yes
+  fi
+else
+  # Append any errors to the config.log.
+  cat out/conftest.err 1>&AC_FD_CC
+  lt_cv_compiler_c_o=no
+fi
+CFLAGS="$save_CFLAGS"
+chmod u+w .
+$rm conftest* out/*
+rmdir out
+cd ..
+rmdir conftest
+$rm -r conftest 2>/dev/null
+])
+compiler_c_o=$lt_cv_compiler_c_o
+AC_MSG_RESULT([$compiler_c_o])
+
+if test x"$compiler_c_o" = x"yes"; then
+  # Check to see if we can write to a .lo
+  AC_MSG_CHECKING([if $compiler supports -c -o file.lo])
+  AC_CACHE_VAL([lt_cv_compiler_o_lo], [
+  lt_cv_compiler_o_lo=no
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -c -o conftest.lo"
+  save_objext="$ac_objext"
+  ac_objext=lo
+  AC_TRY_COMPILE([], [int some_variable = 0;], [dnl
+    # The compiler can only warn and ignore the option if not recognized
+    # So say no if there are warnings
+    if test -s conftest.err; then
+      lt_cv_compiler_o_lo=no
+    else
+      lt_cv_compiler_o_lo=yes
+    fi
+  ])
+  ac_objext="$save_objext"
+  CFLAGS="$save_CFLAGS"
+  ])
+  compiler_o_lo=$lt_cv_compiler_o_lo
+  AC_MSG_RESULT([$compiler_o_lo])
+else
+  compiler_o_lo=no
+fi
+
+# Check to see if we can do hard links to lock some files if needed
+hard_links="nottested"
+if test "$compiler_c_o" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  AC_MSG_CHECKING([if we can lock with hard links])
+  hard_links=yes
+  $rm conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  AC_MSG_RESULT([$hard_links])
+  if test "$hard_links" = no; then
+    AC_MSG_WARN([\`$CC' does not support \`-c -o', so \`make -j' may be unsafe])
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+if test "$GCC" = yes; then
+  # Check to see if options -fno-rtti -fno-exceptions are supported by compiler
+  AC_MSG_CHECKING([if $compiler supports -fno-rtti -fno-exceptions])
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.$ac_ext"
+  compiler_rtti_exceptions=no
+  AC_TRY_COMPILE([], [int some_variable = 0;], [dnl
+    # The compiler can only warn and ignore the option if not recognized
+    # So say no if there are warnings
+    if test -s conftest.err; then
+      compiler_rtti_exceptions=no
+    else
+      compiler_rtti_exceptions=yes
+    fi
+  ])
+  CFLAGS="$save_CFLAGS"
+  AC_MSG_RESULT([$compiler_rtti_exceptions])
+
+  if test "$compiler_rtti_exceptions" = "yes"; then
+    no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions'
+  else
+    no_builtin_flag=' -fno-builtin'
+  fi
+fi
+
+# See if the linker supports building shared libraries.
+AC_MSG_CHECKING([whether the linker ($LD) supports shared libraries])
+
+allow_undefined_flag=
+no_undefined_flag=
+need_lib_prefix=unknown
+need_version=unknown
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+archive_cmds=
+archive_expsym_cmds=
+old_archive_from_new_cmds=
+old_archive_from_expsyms_cmds=
+export_dynamic_flag_spec=
+whole_archive_flag_spec=
+thread_safe_flag_spec=
+hardcode_into_libs=no
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+hardcode_shlibpath_var=unsupported
+runpath_var=
+link_all_deplibs=unknown
+always_export_symbols=no
+export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols'
+# include_expsyms should be a list of space-separated symbols to be *always*
+# included in the symbol list
+include_expsyms=
+# exclude_expsyms can be an egrep regular expression of symbols to exclude
+# it will be wrapped by ` (' and `)$', so one must not match beginning or
+# end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+# as well as any symbol that contains `d'.
+exclude_expsyms="_GLOBAL_OFFSET_TABLE_"
+# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+# platforms (ab)use it in PIC code, but their linkers get confused if
+# the symbol is explicitly referenced.  Since portable code cannot
+# rely on this symbol name, it's probably fine to never include it in
+# preloaded symbol tables.
+extract_expsyms_cmds=
+
+case $host_os in
+cygwin* | mingw* | pw32*)
+  # FIXME: the MSVC++ port hasn't been tested in a loooong time
+  # When not using gcc, we currently assume that we are using
+  # Microsoft Visual C++.
+  if test "$GCC" != yes; then
+    with_gnu_ld=no
+  fi
+  ;;
+openbsd*)
+  with_gnu_ld=no
+  ;;
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+  # If archive_cmds runs LD, not CC, wlarc should be empty
+  wlarc='${wl}'
+
+  # See if GNU ld supports shared libraries.
+  case $host_os in
+  aix3* | aix4* | aix5*)
+    # On AIX, the GNU linker is very broken
+    # Note:Check GNU linker on AIX 5-IA64 when/if it becomes available.
+    ld_shlibs=no
+    cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+    ;;
+
+  amigaos*)
+    archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+
+    # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+    # that the semantics of dynamic libraries on AmigaOS, at least up
+    # to version 4, is to share data among multiple programs linked
+    # with the same dynamic library.  Since this doesn't match the
+    # behavior of shared libraries on other platforms, we can use
+    # them.
+    ld_shlibs=no
+    ;;
+
+  beos*)
+    if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      allow_undefined_flag=unsupported
+      # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+      # support --undefined.  This deserves some investigation.  FIXME
+      archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # hardcode_libdir_flag_spec is actually meaningless, as there is
+    # no search path for DLLs.
+    hardcode_libdir_flag_spec='-L$libdir'
+    allow_undefined_flag=unsupported
+    always_export_symbols=yes
+
+    extract_expsyms_cmds='test -f $output_objdir/impgen.c || \
+      sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //;s/^# *$//; p; }" -e d < $''0 > $output_objdir/impgen.c~
+      test -f $output_objdir/impgen.exe || (cd $output_objdir && \
+      if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \
+      else $CC -o impgen impgen.c ; fi)~
+      $output_objdir/impgen $dir/$soroot > $output_objdir/$soname-def'
+
+    old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib'
+
+    # cygwin and mingw dlls have different entry points and sets of symbols
+    # to exclude.
+    # FIXME: what about values for MSVC?
+    dll_entry=__cygwin_dll_entry@12
+    dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~
+    case $host_os in
+    mingw*)
+      # mingw values
+      dll_entry=_DllMainCRTStartup@12
+      dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~
+      ;;
+    esac
+
+    # mingw and cygwin differ, and it's simplest to just exclude the union
+    # of the two symbol sets.
+    dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12
+
+    # recent cygwin and mingw systems supply a stub DllMain which the user
+    # can override, but on older systems we have to supply one (in ltdll.c)
+    if test "x$lt_cv_need_dllmain" = "xyes"; then
+      ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext "
+      ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~
+       test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~'
+    else
+      ltdll_obj=
+      ltdll_cmds=
+    fi
+
+    # Extract the symbol export list from an `--export-all' def file,
+    # then regenerate the def file from the symbol export list, so that
+    # the compiled dll only exports the symbol export list.
+    # Be careful not to strip the DATA tag left be newer dlltools.
+    export_symbols_cmds="$ltdll_cmds"'
+      $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~
+      sed -e "1,/EXPORTS/d" -e "s/ @ [[0-9]]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols'
+
+    # If the export-symbols file already is a .def file (1st line
+    # is EXPORTS), use it as is.
+    # If DATA tags from a recent dlltool are present, honour them!
+    archive_expsym_cmds='if test "x`sed 1q $export_symbols`" = xEXPORTS; then
+       cp $export_symbols $output_objdir/$soname-def;
+      else
+       echo EXPORTS > $output_objdir/$soname-def;
+       _lt_hint=1;
+       cat $export_symbols | while read symbol; do
+        set dummy \$symbol;
+        case \[$]# in
+          2) echo "   \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
+          4) echo "   \[$]2 \[$]3 \[$]4 ; " >> $output_objdir/$soname-def; _lt_hint=`expr \$_lt_hint - 1`;;
+          *) echo "     \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;;
+        esac;
+        _lt_hint=`expr 1 + \$_lt_hint`;
+       done;
+      fi~
+      '"$ltdll_cmds"'
+      $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+      $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+      $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+      $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp --output-lib $output_objdir/$libname.dll.a~
+      $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags'
+    ;;
+
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+      wlarc=
+    else
+      archive_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    fi
+    ;;
+
+  solaris* | sysv5*)
+    if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then
+      ld_shlibs=no
+      cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+    elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+
+  sunos4*)
+    archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+    wlarc=
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  *)
+    if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+  esac
+
+  if test "$ld_shlibs" = yes; then
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
+    export_dynamic_flag_spec='${wl}--export-dynamic'
+    case $host_os in
+    cygwin* | mingw* | pw32*)
+      # dlltool doesn't understand --whole-archive et. al.
+      whole_archive_flag_spec=
+      ;;
+    *)
+      # ancient GNU ld didn't support --whole-archive et. al.
+      if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then
+       whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+      else
+       whole_archive_flag_spec=
+      fi
+      ;;
+    esac
+  fi
+else
+  # PORTME fill in a description of your system's linker (not GNU ld)
+  case $host_os in
+  aix3*)
+    allow_undefined_flag=unsupported
+    always_export_symbols=yes
+    archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+    # Note: this linker hardcodes the directories in LIBPATH if there
+    # are no directories specified by -L.
+    hardcode_minus_L=yes
+    if test "$GCC" = yes && test -z "$link_static_flag"; then
+      # Neither direct hardcoding nor static linking is supported with a
+      # broken collect2.
+      hardcode_direct=unsupported
+    fi
+    ;;
+
+  aix4* | aix5*)
+    if test "$host_cpu" = ia64; then
+      # On IA64, the linker does run time linking by default, so we don't
+      # have to do anything special.
+      aix_use_runtimelinking=no
+      exp_sym_flag='-Bexport'
+      no_entry_flag=""
+    else
+      aix_use_runtimelinking=no
+
+      # Test if we are trying to use run time linking or normal
+      # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+      # need to do runtime linking.
+      case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+       for ld_flag in $LDFLAGS; do
+         case $ld_flag in
+         *-brtl*)
+           aix_use_runtimelinking=yes
+           break
+         ;;
+         esac
+       done
+      esac
+
+      exp_sym_flag='-bexport'
+      no_entry_flag='-bnoentry'
+    fi
+
+    # When large executables or shared objects are built, AIX ld can
+    # have problems creating the table of contents.  If linking a library
+    # or program results in "error TOC overflow" add -mminimal-toc to
+    # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+    # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+    hardcode_direct=yes
+    archive_cmds=''
+    hardcode_libdir_separator=':'
+    if test "$GCC" = yes; then
+      case $host_os in aix4.[[012]]|aix4.[[012]].*)
+       collect2name=`${CC} -print-prog-name=collect2`
+       if test -f "$collect2name" && \
+         strings "$collect2name" | grep resolve_lib_name >/dev/null
+       then
+         # We have reworked collect2
+         hardcode_direct=yes
+       else
+         # We have old collect2
+         hardcode_direct=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         hardcode_minus_L=yes
+         hardcode_libdir_flag_spec='-L$libdir'
+         hardcode_libdir_separator=
+       fi
+      esac
+
+      shared_flag='-shared'
+    else
+      # not using gcc
+      if test "$host_cpu" = ia64; then
+       shared_flag='${wl}-G'
+      else
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag='${wl}-G'
+       else
+         shared_flag='${wl}-bM:SRE'
+       fi
+      fi
+    fi
+
+    # It seems that -bexpall can do strange things, so it is better to
+    # generate a list of symbols to export.
+    always_export_symbols=yes
+    if test "$aix_use_runtimelinking" = yes; then
+      # Warning - without using the other runtime loading flags (-brtl),
+      # -berok will link without error, but may produce a broken library.
+      allow_undefined_flag='-berok'
+      hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib'
+      archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+    else
+      if test "$host_cpu" = ia64; then
+       hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+       allow_undefined_flag="-z nodefs"
+       archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"
+      else
+       hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib'
+       # Warning - without using the other run time loading flags,
+       # -berok will link without error, but may produce a broken library.
+       allow_undefined_flag='${wl}-berok'
+       # This is a bit strange, but is similar to how AIX traditionally builds
+       # it's shared libraries.
+       archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname'
+      fi
+    fi
+    ;;
+
+  amigaos*)
+    archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+    # see comment about different semantics on the GNU ld section
+    ld_shlibs=no
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    # hardcode_libdir_flag_spec is actually meaningless, as there is
+    # no search path for DLLs.
+    hardcode_libdir_flag_spec=' '
+    allow_undefined_flag=unsupported
+    # Tell ltmain to make .lib files, not .a files.
+    libext=lib
+    # FIXME: Setting linknames here is a bad hack.
+    archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames='
+    # The linker will automatically build a .lib file if we build a DLL.
+    old_archive_from_new_cmds='true'
+    # FIXME: Should let the user specify the lib program.
+    old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs'
+    fix_srcfile_path='`cygpath -w "$srcfile"`'
+    ;;
+
+  darwin* | rhapsody*)
+    case "$host_os" in
+    rhapsody* | darwin1.[[012]])
+      allow_undefined_flag='-undefined suppress'
+      ;;
+    *) # Darwin 1.3 on
+      allow_undefined_flag='-flat_namespace -undefined suppress'
+      ;;
+    esac
+    # FIXME: Relying on posixy $() will cause problems for
+    #        cross-compilation, but unfortunately the echo tests do not
+    #        yet detect zsh echo's removal of \ escapes.  Also zsh mangles
+    #       `"' quotes if we put them in here... so don't!
+    archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$linker_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)'
+    # We need to add '_' to the symbols in $export_symbols first
+    #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    whole_archive_flag_spec='-all_load $convenience'
+    ;;
+
+  freebsd1*)
+    ld_shlibs=no
+    ;;
+
+  # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+  # support.  Future versions do this automatically, but an explicit c++rt0.o
+  # does not break anything, and helps significantly (at the cost of a little
+  # extra space).
+  freebsd2.2*)
+    archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+  freebsd2*)
+    archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_minus_L=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+  freebsd*)
+    archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  hpux9* | hpux10* | hpux11*)
+    case $host_os in
+    hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;;
+    *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;;
+    esac
+    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+    hardcode_libdir_separator=:
+    hardcode_direct=yes
+    hardcode_minus_L=yes # Not in the search PATH, but as the default
+                        # location of the library.
+    export_dynamic_flag_spec='${wl}-E'
+    ;;
+
+  irix5* | irix6* | nonstopux*)
+    if test "$GCC" = yes; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    else
+      archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='-rpath $libdir'
+    fi
+    hardcode_libdir_separator=:
+    link_all_deplibs=yes
+    ;;
+
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+    else
+      archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+    fi
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  newsos6)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    hardcode_libdir_separator=:
+    hardcode_shlibpath_var=no
+    ;;
+
+  openbsd*)
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+      archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec='${wl}-E'
+    else
+      case "$host_os" in
+      openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+       archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+       hardcode_libdir_flag_spec='-R$libdir'
+        ;;
+      *)
+        archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+        hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+        ;;
+      esac
+    fi
+    ;;
+
+  os2*)
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+    allow_undefined_flag=unsupported
+    archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+    old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+    ;;
+
+  osf3*)
+    if test "$GCC" = yes; then
+      allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+      archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+    else
+      allow_undefined_flag=' -expect_unresolved \*'
+      archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+    fi
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    hardcode_libdir_separator=:
+    ;;
+
+  osf4* | osf5*)       # as osf3* with the addition of -msym flag
+    if test "$GCC" = yes; then
+      allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+      archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    else
+      allow_undefined_flag=' -expect_unresolved \*'
+      archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      archive_expsym_cmds='for i in `cat $export_symbols`; do printf "-exported_symbol " >> $lib.exp; echo "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~
+      $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib~$rm $lib.exp'
+
+      #Both c and cxx compiler support -rpath directly
+      hardcode_libdir_flag_spec='-rpath $libdir'
+    fi
+    hardcode_libdir_separator=:
+    ;;
+
+  sco3.2v5*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_shlibpath_var=no
+    runpath_var=LD_RUN_PATH
+    hardcode_runpath_var=yes
+    export_dynamic_flag_spec='${wl}-Bexport'
+    ;;
+
+  solaris*)
+    # gcc --version < 3.0 without binutils cannot create self contained
+    # shared libraries reliably, requiring libgcc.a to resolve some of
+    # the object symbols generated in some cases.  Libraries that use
+    # assert need libgcc.a to resolve __eprintf, for example.  Linking
+    # a copy of libgcc.a into every shared library to guarantee resolving
+    # such symbols causes other problems:  According to Tim Van Holder
+    # <tim.van.holder@pandora.be>, C++ libraries end up with a separate
+    # (to the application) exception stack for one thing.
+    no_undefined_flag=' -z defs'
+    if test "$GCC" = yes; then
+      case `$CC --version 2>/dev/null` in
+      [[12]].*)
+       cat <<EOF 1>&2
+
+*** Warning: Releases of GCC earlier than version 3.0 cannot reliably
+*** create self contained shared libraries on Solaris systems, without
+*** introducing a dependency on libgcc.a.  Therefore, libtool is disabling
+*** -no-undefined support, which will at least allow you to build shared
+*** libraries.  However, you may find that when you link such libraries
+*** into an application without using GCC, you have to manually add
+*** \`gcc --print-libgcc-file-name\` to the link command.  We urge you to
+*** upgrade to a newer version of GCC.  Another option is to rebuild your
+*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer.
+
+EOF
+        no_undefined_flag=
+       ;;
+      esac
+    fi
+    # $CC -shared without GNU ld will not create a library from C++
+    # object files and a static libstdc++, better avoid it by now
+    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+               $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_shlibpath_var=no
+    case $host_os in
+    solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+    *) # Supported since Solaris 2.6 (maybe 2.5.1?)
+      whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
+    esac
+    link_all_deplibs=yes
+    ;;
+
+  sunos4*)
+    if test "x$host_vendor" = xsequent; then
+      # Use $CC to link under sequent, because it throws in some extra .o
+      # files that make .init and .fini sections work.
+      archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+    else
+      archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+    fi
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_direct=yes
+    hardcode_minus_L=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4)
+    case $host_vendor in
+      sni)
+        archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+        hardcode_direct=yes # is this really true???
+        ;;
+      siemens)
+        ## LD is ld it makes a PLAMLIB
+        ## CC just makes a GrossModule.
+        archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+        reload_cmds='$CC -r -o $output$reload_objs'
+        hardcode_direct=no
+        ;;
+      motorola)
+        archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+        hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+        ;;
+    esac
+    runpath_var='LD_RUN_PATH'
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4.3*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_shlibpath_var=no
+    export_dynamic_flag_spec='-Bexport'
+    ;;
+
+  sysv5*)
+    no_undefined_flag=' -z text'
+    # $CC -shared without GNU ld will not create a library from C++
+    # object files and a static libstdc++, better avoid it by now
+    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+               $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+    hardcode_libdir_flag_spec=
+    hardcode_shlibpath_var=no
+    runpath_var='LD_RUN_PATH'
+    ;;
+
+  uts4*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_shlibpath_var=no
+    ;;
+
+  dgux*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4*MP*)
+    if test -d /usr/nec; then
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var=no
+      runpath_var=LD_RUN_PATH
+      hardcode_runpath_var=yes
+      ld_shlibs=yes
+    fi
+    ;;
+
+  sysv4.2uw2*)
+    archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_minus_L=no
+    hardcode_shlibpath_var=no
+    hardcode_runpath_var=yes
+    runpath_var=LD_RUN_PATH
+    ;;
+
+  sysv5uw7* | unixware7*)
+    no_undefined_flag='${wl}-z ${wl}text'
+    if test "$GCC" = yes; then
+      archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+    else
+      archive_cmds='$CC -G ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+    fi
+    runpath_var='LD_RUN_PATH'
+    hardcode_shlibpath_var=no
+    ;;
+
+  *)
+    ld_shlibs=no
+    ;;
+  esac
+fi
+AC_MSG_RESULT([$ld_shlibs])
+test "$ld_shlibs" = no && can_build_shared=no
+
+# Check hardcoding attributes.
+AC_MSG_CHECKING([how to hardcode library paths into programs])
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" || \
+   test -n "$runpath_var"; then
+
+  # We can hardcode non-existant directories.
+  if test "$hardcode_direct" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$hardcode_shlibpath_var" != no &&
+     test "$hardcode_minus_L" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action=unsupported
+fi
+AC_MSG_RESULT([$hardcode_action])
+
+striplib=
+old_striplib=
+AC_MSG_CHECKING([whether stripping libraries is possible])
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  AC_MSG_RESULT([yes])
+else
+  AC_MSG_RESULT([no])
+fi
+
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+# PORTME Fill in your ld.so characteristics
+AC_MSG_CHECKING([dynamic linker characteristics])
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}.so$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}.so$major ${libname}${release}.so$versuffix $libname.so'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[[01]] | aix4.[[01]].*)
+       if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+            echo ' yes '
+            echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+         :
+       else
+         can_build_shared=no
+       fi
+       ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can
+    # not hardcode correct soname into executable. Probably we can
+    # add versioning support to collect2, so additional links can
+    # be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}.so$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  hardcode_into_libs=yes
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}.so'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi4*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  export_dynamic_flag_spec=-rdynamic
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  need_version=no
+  need_lib_prefix=no
+  case $GCC,$host_os in
+  yes,cygwin*)
+    library_names_spec='$libname.dll.a'
+    soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll'
+    postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog .libs/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`bash 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    ;;
+  yes,mingw*)
+    library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll'
+    sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g" -e "s,=/,/,g"`
+    ;;
+  yes,pw32*)
+    library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
+    ;;
+  *)
+    library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  # FIXME: Relying on posixy $() will cause problems for
+  #        cross-compilation, but unfortunately the echo tests do not
+  #        yet detect zsh echo's removal of \ escapes.
+  library_names_spec='${libname}${release}${versuffix}.$(test .$module = .yes && echo so || echo dylib) ${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib) ${libname}.$(test .$module = .yes && echo so || echo dylib)'
+  soname_spec='${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib)'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  ;;
+
+freebsd1*)
+  dynamic_linker=no
+  ;;
+
+freebsd*)
+  objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout`
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  *)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  dynamic_linker="$host_os dld.sl"
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  shlibpath_var=SHLIB_PATH
+  shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+  library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl'
+  soname_spec='${libname}${release}.sl$major'
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)          version_type=irix ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+
+  # Find out which ABI we are using (multilib Linux x86_64 hack).
+  libsuff=
+  case "$host_cpu" in
+  x86_64*|s390x*)
+    echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+    if AC_TRY_EVAL(ac_compile); then
+      case `/usr/bin/file conftest.$ac_objext` in
+      *64-bit*)
+        libsuff=64
+        ;;
+      esac
+    fi
+    rm -rf conftest*
+    ;;
+  *)
+    ;;
+  esac
+  sys_lib_dlsearch_path_spec="/lib${libsuff} /usr/lib${libsuff}"
+  sys_lib_search_path_spec="/lib${libsuff} /usr/lib${libsuff} /usr/local/lib${libsuff}"
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so'
+    soname_spec='${libname}${release}.so$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case "$host_os" in
+    openbsd2.[[89]] | openbsd2.[[89]].*)
+      shlibpath_overrides_runpath=no
+      ;;
+    *)
+      shlibpath_overrides_runpath=yes
+      ;;
+    esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+os2*)
+  libname_spec='$name'
+  need_lib_prefix=no
+  library_names_spec='$libname.dll $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_version=no
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  hardcode_into_libs=yes
+  ;;
+
+sco3.2v5*)
+  version_type=osf
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so'
+    soname_spec='$libname.so.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+AC_MSG_RESULT([$dynamic_linker])
+test "$dynamic_linker" = no && can_build_shared=no
+
+# Report the final consequences.
+AC_MSG_CHECKING([if libtool supports shared libraries])
+AC_MSG_RESULT([$can_build_shared])
+
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case "$host_os" in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+
+aix4*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+  ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
+if test "$hardcode_action" = relink; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+AC_LIBTOOL_DLOPEN_SELF
+
+if test "$enable_shared" = yes && test "$GCC" = yes; then
+  case $archive_cmds in
+  *'~'*)
+    # FIXME: we may have to deal with multi-command sequences.
+    ;;
+  '$CC '*)
+    # Test whether the compiler implicitly links with -lc since on some
+    # systems, -lgcc has to come before -lc. If gcc already passes -lc
+    # to ld, don't add -lc before -lgcc.
+    AC_MSG_CHECKING([whether -lc should be explicitly linked in])
+    AC_CACHE_VAL([lt_cv_archive_cmds_need_lc],
+    [$rm conftest*
+    echo 'static int dummy;' > conftest.$ac_ext
+
+    if AC_TRY_EVAL(ac_compile); then
+      soname=conftest
+      lib=conftest
+      libobjs=conftest.$ac_objext
+      deplibs=
+      wl=$lt_cv_prog_cc_wl
+      compiler_flags=-v
+      linker_flags=-v
+      verstring=
+      output_objdir=.
+      libname=conftest
+      save_allow_undefined_flag=$allow_undefined_flag
+      allow_undefined_flag=
+      if AC_TRY_EVAL(archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1)
+      then
+       lt_cv_archive_cmds_need_lc=no
+      else
+       lt_cv_archive_cmds_need_lc=yes
+      fi
+      allow_undefined_flag=$save_allow_undefined_flag
+    else
+      cat conftest.err 1>&5
+    fi
+    $rm conftest*])
+    AC_MSG_RESULT([$lt_cv_archive_cmds_need_lc])
+    ;;
+  esac
+fi
+need_lc=${lt_cv_archive_cmds_need_lc-yes}
+
+# The second clause should only fire when bootstrapping the
+# libtool distribution, otherwise you forgot to ship ltmain.sh
+# with your package, and you will get complaints that there are
+# no rules to generate ltmain.sh.
+if test -f "$ltmain"; then
+  :
+else
+  # If there is no Makefile yet, we rely on a make rule to execute
+  # `config.status --recheck' to rerun these tests and create the
+  # libtool script then.
+  test -f Makefile && make "$ltmain"
+fi
+
+if test -f "$ltmain"; then
+  trap "$rm \"${ofile}T\"; exit 1" 1 2 15
+  $rm -f "${ofile}T"
+
+  echo creating $ofile
+
+  # Now quote all the things that may contain metacharacters while being
+  # careful not to overquote the AC_SUBSTed values.  We take copies of the
+  # variables and quote the copies for generation of the libtool script.
+  for var in echo old_CC old_CFLAGS SED \
+    AR AR_FLAGS CC LD LN_S NM SHELL \
+    reload_flag reload_cmds wl \
+    pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \
+    thread_safe_flag_spec whole_archive_flag_spec libname_spec \
+    library_names_spec soname_spec \
+    RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
+    old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \
+    postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \
+    old_striplib striplib file_magic_cmd export_symbols_cmds \
+    deplibs_check_method allow_undefined_flag no_undefined_flag \
+    finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \
+    global_symbol_to_c_name_address \
+    hardcode_libdir_flag_spec hardcode_libdir_separator  \
+    sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+    compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do
+
+    case $var in
+    reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
+    old_postinstall_cmds | old_postuninstall_cmds | \
+    export_symbols_cmds | archive_cmds | archive_expsym_cmds | \
+    extract_expsyms_cmds | old_archive_from_expsyms_cmds | \
+    postinstall_cmds | postuninstall_cmds | \
+    finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+      # Double-quote double-evaled strings.
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\""
+      ;;
+    *)
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\""
+      ;;
+    esac
+  done
+
+  cat <<__EOF__ > "${ofile}T"
+#! $SHELL
+
+# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A sed that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="${SED} -e s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+# ### BEGIN LIBTOOL CONFIG
+
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$need_lc
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+
+# An echo program that does not interpret backslashes.
+echo=$lt_echo
+
+# The archiver.
+AR=$lt_AR
+AR_FLAGS=$lt_AR_FLAGS
+
+# The default C compiler.
+CC=$lt_CC
+
+# Is the compiler the GNU C compiler?
+with_gcc=$GCC
+
+# The linker used to build libraries.
+LD=$lt_LD
+
+# Whether we need hard or soft links.
+LN_S=$lt_LN_S
+
+# A BSD-compatible nm program.
+NM=$lt_NM
+
+# A symbol stripping program
+STRIP=$STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC_CMD=$MAGIC_CMD
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$lt_wl
+
+# Object file suffix (normally "o").
+objext="$ac_objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_pic_flag
+pic_mode=$pic_mode
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_compiler_c_o
+
+# Can we write directly to a .lo ?
+compiler_o_lo=$lt_compiler_o_lo
+
+# Must we lock files when doing compilation ?
+need_locks=$lt_need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_link_static_flag
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_no_builtin_flag
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$lt_thread_safe_flag_spec
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$lt_RANLIB
+old_archive_cmds=$lt_old_archive_cmds
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
+
+# Commands used to build and install a shared archive.
+archive_cmds=$lt_archive_cmds
+archive_expsym_cmds=$lt_archive_expsym_cmds
+postinstall_cmds=$lt_postinstall_cmds
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$lt_file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$lt_finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$lt_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair
+global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at relink time.
+variables_saved_for_relink="$variables_saved_for_relink"
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$fix_srcfile_path"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms
+
+# ### END LIBTOOL CONFIG
+
+__EOF__
+
+  case $host_os in
+  aix3*)
+    cat <<\EOF >> "${ofile}T"
+
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+EOF
+    ;;
+  esac
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | os2*)
+    cat <<'EOF' >> "${ofile}T"
+      # This is a source program that is used to create dlls on Windows
+      # Don't remove nor modify the starting and closing comments
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# #  ifdef __CYGWIN32__
+# #    define __CYGWIN__ __CYGWIN32__
+# #  endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+#   __hDllInstance_base = hInst;
+#   return TRUE;
+# }
+# /* ltdll.c ends here */
+       # This is a source program that is used to create import libraries
+       # on Windows for dlls which lack them. Don't remove nor modify the
+       # starting and closing comments
+# /* impgen.c starts here */
+# /*   Copyright (C) 1999-2000 Free Software Foundation, Inc.
+#
+#  This file is part of GNU libtool.
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#  */
+#
+# #include <stdio.h>           /* for printf() */
+# #include <unistd.h>          /* for open(), lseek(), read() */
+# #include <fcntl.h>           /* for O_RDONLY, O_BINARY */
+# #include <string.h>          /* for strdup() */
+#
+# /* O_BINARY isn't required (or even defined sometimes) under Unix */
+# #ifndef O_BINARY
+# #define O_BINARY 0
+# #endif
+#
+# static unsigned int
+# pe_get16 (fd, offset)
+#      int fd;
+#      int offset;
+# {
+#   unsigned char b[2];
+#   lseek (fd, offset, SEEK_SET);
+#   read (fd, b, 2);
+#   return b[0] + (b[1]<<8);
+# }
+#
+# static unsigned int
+# pe_get32 (fd, offset)
+#     int fd;
+#     int offset;
+# {
+#   unsigned char b[4];
+#   lseek (fd, offset, SEEK_SET);
+#   read (fd, b, 4);
+#   return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# static unsigned int
+# pe_as32 (ptr)
+#      void *ptr;
+# {
+#   unsigned char *b = ptr;
+#   return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# int
+# main (argc, argv)
+#     int argc;
+#     char *argv[];
+# {
+#     int dll;
+#     unsigned long pe_header_offset, opthdr_ofs, num_entries, i;
+#     unsigned long export_rva, export_size, nsections, secptr, expptr;
+#     unsigned long name_rvas, nexp;
+#     unsigned char *expdata, *erva;
+#     char *filename, *dll_name;
+#
+#     filename = argv[1];
+#
+#     dll = open(filename, O_RDONLY|O_BINARY);
+#     if (dll < 1)
+#      return 1;
+#
+#     dll_name = filename;
+#
+#     for (i=0; filename[i]; i++)
+#      if (filename[i] == '/' || filename[i] == '\\'  || filename[i] == ':')
+#          dll_name = filename + i +1;
+#
+#     pe_header_offset = pe_get32 (dll, 0x3c);
+#     opthdr_ofs = pe_header_offset + 4 + 20;
+#     num_entries = pe_get32 (dll, opthdr_ofs + 92);
+#
+#     if (num_entries < 1) /* no exports */
+#      return 1;
+#
+#     export_rva = pe_get32 (dll, opthdr_ofs + 96);
+#     export_size = pe_get32 (dll, opthdr_ofs + 100);
+#     nsections = pe_get16 (dll, pe_header_offset + 4 +2);
+#     secptr = (pe_header_offset + 4 + 20 +
+#            pe_get16 (dll, pe_header_offset + 4 + 16));
+#
+#     expptr = 0;
+#     for (i = 0; i < nsections; i++)
+#     {
+#      char sname[8];
+#      unsigned long secptr1 = secptr + 40 * i;
+#      unsigned long vaddr = pe_get32 (dll, secptr1 + 12);
+#      unsigned long vsize = pe_get32 (dll, secptr1 + 16);
+#      unsigned long fptr = pe_get32 (dll, secptr1 + 20);
+#      lseek(dll, secptr1, SEEK_SET);
+#      read(dll, sname, 8);
+#      if (vaddr <= export_rva && vaddr+vsize > export_rva)
+#      {
+#          expptr = fptr + (export_rva - vaddr);
+#          if (export_rva + export_size > vaddr + vsize)
+#              export_size = vsize - (export_rva - vaddr);
+#          break;
+#      }
+#     }
+#
+#     expdata = (unsigned char*)malloc(export_size);
+#     lseek (dll, expptr, SEEK_SET);
+#     read (dll, expdata, export_size);
+#     erva = expdata - export_rva;
+#
+#     nexp = pe_as32 (expdata+24);
+#     name_rvas = pe_as32 (expdata+32);
+#
+#     printf ("EXPORTS\n");
+#     for (i = 0; i<nexp; i++)
+#     {
+#      unsigned long name_rva = pe_as32 (erva+name_rvas+i*4);
+#      printf ("\t%s @ %ld ;\n", erva+name_rva, 1+ i);
+#     }
+#
+#     return 0;
+# }
+# /* impgen.c ends here */
+
+EOF
+    ;;
+  esac
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "${ofile}T" || (rm -f "${ofile}T"; exit 1)
+
+  mv -f "${ofile}T" "$ofile" || \
+    (rm -f "$ofile" && cp "${ofile}T" "$ofile" && rm -f "${ofile}T")
+  chmod +x "$ofile"
+fi
+
+])# _LT_AC_LTCONFIG_HACK
+
+# AC_LIBTOOL_DLOPEN - enable checks for dlopen support
+AC_DEFUN([AC_LIBTOOL_DLOPEN], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])])
+
+# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's
+AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])])
+
+# AC_ENABLE_SHARED - implement the --enable-shared flag
+# Usage: AC_ENABLE_SHARED[(DEFAULT)]
+#   Where DEFAULT is either `yes' or `no'.  If omitted, it defaults to
+#   `yes'.
+AC_DEFUN([AC_ENABLE_SHARED],
+[define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(shared,
+changequote(<<, >>)dnl
+<<  --enable-shared[=PKGS]  build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case $enableval in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+  enable_shared=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_shared=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac],
+enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl
+])
+
+# AC_DISABLE_SHARED - set the default shared flag to --disable-shared
+AC_DEFUN([AC_DISABLE_SHARED],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_SHARED(no)])
+
+# AC_ENABLE_STATIC - implement the --enable-static flag
+# Usage: AC_ENABLE_STATIC[(DEFAULT)]
+#   Where DEFAULT is either `yes' or `no'.  If omitted, it defaults to
+#   `yes'.
+AC_DEFUN([AC_ENABLE_STATIC],
+[define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(static,
+changequote(<<, >>)dnl
+<<  --enable-static[=PKGS]  build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case $enableval in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+  enable_static=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_static=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac],
+enable_static=AC_ENABLE_STATIC_DEFAULT)dnl
+])
+
+# AC_DISABLE_STATIC - set the default static flag to --disable-static
+AC_DEFUN([AC_DISABLE_STATIC],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_STATIC(no)])
+
+
+# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag
+# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)]
+#   Where DEFAULT is either `yes' or `no'.  If omitted, it defaults to
+#   `yes'.
+AC_DEFUN([AC_ENABLE_FAST_INSTALL],
+[define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(fast-install,
+changequote(<<, >>)dnl
+<<  --enable-fast-install[=PKGS]  optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case $enableval in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+  enable_fast_install=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_fast_install=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac],
+enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl
+])
+
+# AC_DISABLE_FAST_INSTALL - set the default to --disable-fast-install
+AC_DEFUN([AC_DISABLE_FAST_INSTALL],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_FAST_INSTALL(no)])
+
+# AC_LIBTOOL_PICMODE - implement the --with-pic flag
+# Usage: AC_LIBTOOL_PICMODE[(MODE)]
+#   Where MODE is either `yes' or `no'.  If omitted, it defaults to
+#   `both'.
+AC_DEFUN([AC_LIBTOOL_PICMODE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+pic_mode=ifelse($#,1,$1,default)])
+
+
+# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library
+AC_DEFUN([AC_PATH_TOOL_PREFIX],
+[AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
+[case $MAGIC_CMD in
+  /*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+  ?:/*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path.
+  ;;
+  *)
+  ac_save_MAGIC_CMD="$MAGIC_CMD"
+  IFS="${IFS=   }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+  ac_dummy="ifelse([$2], , $PATH, [$2])"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$1; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           egrep "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  MAGIC_CMD="$ac_save_MAGIC_CMD"
+  ;;
+esac])
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  AC_MSG_RESULT($MAGIC_CMD)
+else
+  AC_MSG_RESULT(no)
+fi
+])
+
+
+# AC_PATH_MAGIC - find a file program which can recognise a shared library
+AC_DEFUN([AC_PATH_MAGIC],
+[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl
+AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH)
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH)
+  else
+    MAGIC_CMD=:
+  fi
+fi
+])
+
+
+# AC_PROG_LD - find the path to the GNU or non-GNU linker
+AC_DEFUN([AC_PROG_LD],
+[AC_ARG_WITH(gnu-ld,
+[  --with-gnu-ld           assume the C compiler uses GNU ld [default=no]],
+test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no)
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  AC_MSG_CHECKING([for ld used by GCC])
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [[\\/]]* | [[A-Za-z]]:[[\\/]]*)
+      re_direlt='/[[^/]][[^/]]*/\.\./'
+      # Canonicalize the path of ld
+      ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+      while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  AC_MSG_CHECKING([for GNU ld])
+else
+  AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(lt_cv_path_LD,
+[if test -z "$LD"; then
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some GNU ld's only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      if "$lt_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+       test "$with_gnu_ld" != no && break
+      else
+       test "$with_gnu_ld" != yes && break
+      fi
+    fi
+  done
+  IFS="$ac_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  AC_MSG_RESULT($LD)
+else
+  AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+AC_PROG_LD_GNU
+])
+
+# AC_PROG_LD_GNU -
+AC_DEFUN([AC_PROG_LD_GNU],
+[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+  lt_cv_prog_gnu_ld=yes
+else
+  lt_cv_prog_gnu_ld=no
+fi])
+with_gnu_ld=$lt_cv_prog_gnu_ld
+])
+
+# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker
+#   -- PORTME Some linkers may need a different reload flag.
+AC_DEFUN([AC_PROG_LD_RELOAD_FLAG],
+[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag,
+[lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+])
+
+# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies
+#  -- PORTME fill in with the dynamic library characteristics
+AC_DEFUN([AC_DEPLIBS_CHECK_METHOD],
+[AC_CACHE_CHECK([how to recognise dependent libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix4* | aix5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi4*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin* | mingw* | pw32*)
+  lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  case "$host_os" in
+  rhapsody* | darwin1.[[012]])
+    lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1`
+    ;;
+  *) # Darwin 1.3 on
+    lt_cv_file_magic_test_file='/usr/lib/libSystem.dylib'
+    ;;
+  esac
+  ;;
+
+freebsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[[3-9]]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20*|hpux11*)
+  lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libc.sl
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+  irix5* | nonstopux*)
+    # this will be overridden with pass_all, but let us keep it just in case
+    lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+    ;;
+  *)
+    case $LD in
+    *-32|*"-32 ") libmagic=32-bit;;
+    *-n32|*"-n32 ") libmagic=N32;;
+    *-64|*"-64 ") libmagic=64-bit;;
+    *) libmagic=never-match;;
+    esac
+    # this will be overridden with pass_all, but let us keep it just in case
+    lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[[1234]] dynamic lib MIPS - version 1"
+    ;;
+  esac
+  lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+  case $host_cpu in
+  alpha* | hppa* | i*86 | mips | mipsel | powerpc* | sparc* | ia64* | s390* | x86_64*)
+    lt_cv_deplibs_check_method=pass_all ;;
+  *)
+    # glibc up to 2.1.1 does not perform some relocations on ARM
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;;
+  esac
+  lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so\.[[0-9]]+\.[[0-9]]+$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+openbsd*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB shared object'
+  else
+    lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  # this will be overridden with pass_all, but let us keep it just in case
+  lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sco3.2v5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  lt_cv_file_magic_test_file=/lib/libc.so
+  ;;
+
+sysv5uw[[78]]* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+esac
+])
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+])
+
+
+# AC_PROG_NM - find the path to a BSD-compatible name lister
+AC_DEFUN([AC_PROG_NM],
+[AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl
+AC_MSG_CHECKING([for BSD-compatible nm])
+AC_CACHE_VAL(lt_cv_path_NM,
+[if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+    test -z "$ac_dir" && ac_dir=.
+    tmp_nm=$ac_dir/${ac_tool_prefix}nm
+    if test -f $tmp_nm || test -f $tmp_nm$ac_exeext ; then
+      # Check to see if the nm accepts a BSD-compat flag.
+      # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+      #   nm: unknown option "B" ignored
+      # Tru64's nm complains that /dev/null is an invalid object file
+      if ($tmp_nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep '(/dev/null|Invalid file or object type)' >/dev/null; then
+       lt_cv_path_NM="$tmp_nm -B"
+       break
+      elif ($tmp_nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+       lt_cv_path_NM="$tmp_nm -p"
+       break
+      else
+       lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+       continue # so that we can try to find one that supports BSD flags
+      fi
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi])
+NM="$lt_cv_path_NM"
+AC_MSG_RESULT([$NM])
+])
+
+# AC_CHECK_LIBM - check for math library
+AC_DEFUN([AC_CHECK_LIBM],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case $host in
+*-*-beos* | *-*-cygwin* | *-*-pw32*)
+  # These system don't have libm
+  ;;
+*-ncr-sysv4.3*)
+  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+  AC_CHECK_LIB(m, main, LIBM="$LIBM -lm")
+  ;;
+*)
+  AC_CHECK_LIB(m, main, LIBM="-lm")
+  ;;
+esac
+])
+
+# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl convenience library and LTDLINCL to the include flags for
+# the libltdl header and adds --enable-ltdl-convenience to the
+# configure arguments.  Note that LIBLTDL and LTDLINCL are not
+# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called.  If DIR is not
+# provided, it is assumed to be `libltdl'.  LIBLTDL will be prefixed
+# with '${top_builddir}/' and LTDLINCL will be prefixed with
+# '${top_srcdir}/' (note the single quotes!).  If your package is not
+# flat and you're not using automake, define top_builddir and
+# top_srcdir appropriately in the Makefiles.
+AC_DEFUN([AC_LIBLTDL_CONVENIENCE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  case $enable_ltdl_convenience in
+  no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
+  "") enable_ltdl_convenience=yes
+      ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
+  esac
+  LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
+  LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])
+
+# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl installable library and LTDLINCL to the include flags for
+# the libltdl header and adds --enable-ltdl-install to the configure
+# arguments.  Note that LIBLTDL and LTDLINCL are not AC_SUBSTed, nor is
+# AC_CONFIG_SUBDIRS called.  If DIR is not provided and an installed
+# libltdl is not found, it is assumed to be `libltdl'.  LIBLTDL will
+# be prefixed with '${top_builddir}/' and LTDLINCL will be prefixed
+# with '${top_srcdir}/' (note the single quotes!).  If your package is
+# not flat and you're not using automake, define top_builddir and
+# top_srcdir appropriately in the Makefiles.
+# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
+AC_DEFUN([AC_LIBLTDL_INSTALLABLE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  AC_CHECK_LIB(ltdl, main,
+  [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no],
+  [if test x"$enable_ltdl_install" = xno; then
+     AC_MSG_WARN([libltdl not installed, but installation disabled])
+   else
+     enable_ltdl_install=yes
+   fi
+  ])
+  if test x"$enable_ltdl_install" = x"yes"; then
+    ac_configure_args="$ac_configure_args --enable-ltdl-install"
+    LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
+    LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  else
+    ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
+    LIBLTDL="-lltdl"
+    LTDLINCL=
+  fi
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])
+
+# old names
+AC_DEFUN([AM_PROG_LIBTOOL],   [AC_PROG_LIBTOOL])
+AC_DEFUN([AM_ENABLE_SHARED],  [AC_ENABLE_SHARED($@)])
+AC_DEFUN([AM_ENABLE_STATIC],  [AC_ENABLE_STATIC($@)])
+AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
+AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
+AC_DEFUN([AM_PROG_LD],        [AC_PROG_LD])
+AC_DEFUN([AM_PROG_NM],        [AC_PROG_NM])
+
+# This is just to silence aclocal about the macro not being used
+ifelse([AC_DISABLE_FAST_INSTALL])
+
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+# LT_AC_PROG_SED
+# --------------
+# Check for a fully-functional sed program, that truncates
+# as few characters as possible.  Prefer GNU sed if found.
+AC_DEFUN([LT_AC_PROG_SED],
+[AC_MSG_CHECKING([for a sed that does not truncate output])
+AC_CACHE_VAL(lt_cv_path_SED,
+[# Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_executable_p="test -f"
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+        _sed_list="$_sed_list $as_dir/$ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+
+  # Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+  trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+: ${TMPDIR=/tmp}
+{
+  tmp=`(umask 077 && mktemp -d -q "$TMPDIR/sedXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=$TMPDIR/sed$$-$RANDOM
+  (umask 077 && mkdir $tmp)
+} ||
+{
+   echo "$me: cannot create a temporary directory in $TMPDIR" >&2
+   { (exit 1); exit 1; }
+}
+  _max=0
+  _count=0
+  # Add /usr/xpg4/bin/sed as it is typically found on Solaris
+  # along with /bin/sed that truncates output.
+  for _sed in $_sed_list /usr/xpg4/bin/sed; do
+    test ! -f ${_sed} && break
+    cat /dev/null > "$tmp/sed.in"
+    _count=0
+    echo ${ECHO_N-$ac_n} "0123456789${ECHO_C-$ac_c}" >"$tmp/sed.in"
+    # Check for GNU sed and select it if it is found.
+    if "${_sed}" --version 2>&1 < /dev/null | egrep '(GNU)' > /dev/null; then
+      lt_cv_path_SED=${_sed}
+      break
+    fi
+    while true; do
+      cat "$tmp/sed.in" "$tmp/sed.in" >"$tmp/sed.tmp"
+      mv "$tmp/sed.tmp" "$tmp/sed.in"
+      cp "$tmp/sed.in" "$tmp/sed.nl"
+      echo >>"$tmp/sed.nl"
+      ${_sed} -e 's/a$//' < "$tmp/sed.nl" >"$tmp/sed.out" || break
+      cmp -s "$tmp/sed.out" "$tmp/sed.nl" || break
+      # 40000 chars as input seems more than enough
+      test $_count -gt 10 && break
+      _count=`expr $_count + 1`
+      if test $_count -gt $_max; then
+        _max=$_count
+        lt_cv_path_SED=$_sed
+      fi
+    done
+  done
+  rm -rf "$tmp"
+])
+if test "X$SED" != "X"; then
+  lt_cv_path_SED=$SED
+else
+  SED=$lt_cv_path_SED
+fi
+AC_MSG_RESULT([$SED])
+])
+
diff --git a/altui/README.alt b/altui/README.alt
new file mode 100644 (file)
index 0000000..e0f31db
--- /dev/null
@@ -0,0 +1,71 @@
+Here is an alternate command-line user interface for the IJG JPEG software.
+It is designed for use under MS-DOS, and may also be useful on other non-Unix
+operating systems.  (For that matter, this code works fine on Unix, but the
+standard command-line syntax is better on Unix because it is pipe-friendly.)
+
+With this user interface, cjpeg and djpeg accept multiple input file names
+on the command line; output file names are generated by substituting
+appropriate extensions.  The user is prompted before any already-existing
+file will be overwritten.  See usage.alt for details.
+
+Expansion of wild-card file specifications is useful but is not directly
+provided by this code.  Most DOS C compilers have the ability to do wild-card
+expansion "behind the scenes", and we rely on that feature.  On other systems,
+the shell may do it for you, as is done on Unix.
+
+Also, a DOS-specific routine is provided to determine available memory;
+this makes the -maxmemory switch unnecessary except in unusual cases.
+If you know how to determine available memory on a different system,
+you can easily add the necessary code.  (And please send it along to
+jpeg-info@uunet.uu.net so we can include it in future releases!)
+
+
+INSTALLATION
+============
+
+You need to have the main IJG JPEG distribution, release 6 or later.
+Replace the standard cjpeg.c and djpeg.c files with the ones provided here.
+Then build the software as described in the main distribution's install.doc
+file, with these exceptions:
+
+* Define PROGRESS_REPORT in jconfig.h if you want the percent-done display.
+* Define NO_OVERWRITE_CHECK if you *don't* want overwrite confirmation.
+* You may ignore the USE_SETMODE and TWO_FILE_COMMANDLINE symbols discussed
+  in install.doc; these files do not use them.
+* As given, djpeg.c defaults to GIF output (not PPM output as in the standard
+  djpeg.c).  If you want something different, modify DEFAULT_FMT.
+
+You may also need to do something special to enable filename wild-card
+expansion, assuming your compiler has that capability at all.
+
+Modify the standard usage.doc file as described in usage.alt.  (If you want
+to use the Unix-style manual pages cjpeg.1 and djpeg.1, better fix them too.)
+
+
+Here are some specific notes for popular MS-DOS compilers:
+
+Borland C:
+  Add "-DMSDOS" to CFLAGS to enable use of the DOS memory determination code.
+  Link with the standard library file WILDARGS.OBJ to get wild-card expansion.
+
+Microsoft C:
+  Add "-DMSDOS" to CFLAGS to enable use of the DOS memory determination code.
+  Link with the standard library file SETARGV.OBJ to get wild-card expansion.
+  In the versions I've used, you must also add /NOE to the linker switches to
+  avoid a duplicate-symbol error from including SETARGV.
+
+DJGPP (we recommend version 2.0 or later):
+  Add "-DFREE_MEM_ESTIMATE=0" to CFLAGS.  Wild-card expansion is automatic.
+
+
+LEGAL ISSUES
+============
+
+This software is copyright (C) 1991-1998, Thomas G. Lane.
+Terms of distribution and use are the same as for the free IJG JPEG software;
+see its README file for details.
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
diff --git a/altui/cjpeg.c b/altui/cjpeg.c
new file mode 100644 (file)
index 0000000..df1a4f8
--- /dev/null
@@ -0,0 +1,813 @@
+/*
+ * alternate cjpeg.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 6, 2006
+ * ---------------------------------------------------------------------
+ *
+ * This file contains an alternate user interface for the JPEG compressor.
+ * One or more input files are named on the command line, and output file
+ * names are created by substituting ".jpg" for the input file's extension.
+ */
+
+#include "cdjpeg.h"            /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"          /* for version message */
+
+#ifdef USE_CCOMMAND            /* command-line reader for Macintosh */
+#ifdef __MWERKS__
+#include <SIOUX.h>              /* Metrowerks needs this */
+#include <console.h>           /* ... and this */
+#endif
+#ifdef THINK_C
+#include <console.h>           /* Think declares it here */
+#endif
+#endif
+
+#ifndef PATH_MAX               /* ANSI maximum-pathname-length constant */
+#define PATH_MAX 256
+#endif
+
+
+/* Create the add-on message string table. */
+
+#define JMESSAGE(code,string)  string ,
+
+static const char * const cdjpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+
+/*
+ * SIMD Ext: compiler-specific hacks to enable filename wild-card expansion
+ */
+
+#ifdef _MSC_VER                /* Microsoft Visual C++ */
+/* from setargv.c (setargv.obj) */
+/* Tested under Visual C++ V6.0, Toolkit 2003, and 2005 Express Edition */
+int __cdecl _setargv(void) { int __cdecl __setargv(void); return __setargv(); }
+#endif
+#ifdef __BORLANDC__    /* Borland C++ */
+/* from wildargs.c (wildargs.obj) */
+/* Tested under Borland C++ Compiler 5.5 (win32) */
+#include <wildargs.h>
+typedef void _RTLENTRY (* _RTLENTRY _argv_expand_fnc)(char *, _PFN_ADDARG);
+_argv_expand_fnc _argv_expand_ptr = _expand_wild;
+#endif
+
+
+/*
+ * Automatic determination of available memory.
+ */
+
+static long default_maxmem;    /* saves value determined at startup, or 0 */
+
+#ifndef FREE_MEM_ESTIMATE      /* may be defined from command line */
+
+#ifdef MSDOS                   /* For MS-DOS (unless flat-memory model) */
+
+#include <dos.h>               /* for access to intdos() call */
+
+LOCAL(long)
+unused_dos_memory (void)
+/* Obtain total amount of unallocated DOS memory */
+{
+  union REGS regs;
+  long nparas;
+
+  regs.h.ah = 0x48;            /* DOS function Allocate Memory Block */
+  regs.x.bx = 0xFFFF;          /* Ask for more memory than DOS can have */
+  (void) intdos(&regs, &regs);
+  /* DOS will fail and return # of paragraphs actually available in BX. */
+  nparas = (unsigned int) regs.x.bx;
+  /* Times 16 to convert to bytes. */
+  return nparas << 4;
+}
+
+/* The default memory setting is 95% of the available space. */
+#define FREE_MEM_ESTIMATE  ((unused_dos_memory() * 95L) / 100L)
+
+#endif /* MSDOS */
+
+#ifdef ATARI                   /* For Atari ST/STE/TT, Pure C or Turbo C */
+
+#include <ext.h>
+
+/* The default memory setting is 90% of the available space. */
+#define FREE_MEM_ESTIMATE  (((long) coreleft() * 90L) / 100L)
+
+#endif /* ATARI */
+
+/* Add memory-estimation procedures for other operating systems here,
+ * with appropriate #ifdef's around them.
+ */
+
+#endif /* !FREE_MEM_ESTIMATE */
+
+
+/*
+ * This routine determines what format the input file is,
+ * and selects the appropriate input-reading module.
+ *
+ * To determine which family of input formats the file belongs to,
+ * we may look only at the first byte of the file, since C does not
+ * guarantee that more than one character can be pushed back with ungetc.
+ * Looking at additional bytes would require one of these approaches:
+ *     1) assume we can fseek() the input file (fails for piped input);
+ *     2) assume we can push back more than one character (works in
+ *        some C implementations, but unportable);
+ *     3) provide our own buffering (breaks input readers that want to use
+ *        stdio directly, such as the RLE library);
+ * or  4) don't put back the data, and modify the input_init methods to assume
+ *        they start reading after the start of file (also breaks RLE library).
+ * #1 is attractive for MS-DOS but is untenable on Unix.
+ *
+ * The most portable solution for file types that can't be identified by their
+ * first byte is to make the user tell us what they are.  This is also the
+ * only approach for "raw" file types that contain only arbitrary values.
+ * We presently apply this method for Targa files.  Most of the time Targa
+ * files start with 0x00, so we recognize that case.  Potentially, however,
+ * a Targa file could start with any byte value (byte 0 is the length of the
+ * seldom-used ID field), so we provide a switch to force Targa input mode.
+ */
+
+static boolean is_targa;       /* records user -targa switch */
+
+
+LOCAL(cjpeg_source_ptr)
+select_file_type (j_compress_ptr cinfo, FILE * infile)
+{
+  int c;
+
+  if (is_targa) {
+#ifdef TARGA_SUPPORTED
+    return jinit_read_targa(cinfo);
+#else
+    ERREXIT(cinfo, JERR_TGA_NOTCOMP);
+#endif
+  }
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+  if (ungetc(c, infile) == EOF)
+    ERREXIT(cinfo, JERR_UNGETC_FAILED);
+
+  switch (c) {
+#ifdef BMP_SUPPORTED
+  case 'B':
+    return jinit_read_bmp(cinfo);
+#endif
+#ifdef GIF_SUPPORTED
+  case 'G':
+    return jinit_read_gif(cinfo);
+#endif
+#ifdef PPM_SUPPORTED
+  case 'P':
+    return jinit_read_ppm(cinfo);
+#endif
+#ifdef RLE_SUPPORTED
+  case 'R':
+    return jinit_read_rle(cinfo);
+#endif
+#ifdef TARGA_SUPPORTED
+  case 0x00:
+    return jinit_read_targa(cinfo);
+#endif
+  default:
+    ERREXIT(cinfo, JERR_UNKNOWN_FORMAT);
+    break;
+  }
+
+  return NULL;                 /* suppress compiler warnings */
+}
+
+
+/*
+ * Argument-parsing code.
+ * The switch parser is designed to be useful with DOS-style command line
+ * syntax, ie, intermixed switches and file names, where only the switches
+ * to the left of a given file name affect processing of that file.
+ */
+
+
+static const char * progname;  /* program name for error messages */
+static char * outfilename;     /* for -outfile switch */
+
+
+LOCAL(void)
+usage (void)
+/* complain about bad command line */
+{
+  fprintf(stderr, "usage: %s [switches] inputfile(s)\n", progname);
+  fprintf(stderr, "List of input files may use wildcards (* and ?)\n");
+  fprintf(stderr, "Output filename is same as input filename, but extension .jpg\n");
+
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -quality N     Compression quality (0..100; 5-95 is useful range)\n");
+  fprintf(stderr, "  -grayscale     Create monochrome JPEG file\n");
+#ifdef ENTROPY_OPT_SUPPORTED
+  fprintf(stderr, "  -optimize      Optimize Huffman table (smaller file, but slow compression)\n");
+#endif
+#ifdef C_PROGRESSIVE_SUPPORTED
+  fprintf(stderr, "  -progressive   Create progressive JPEG file\n");
+#endif
+#ifdef TARGA_SUPPORTED
+  fprintf(stderr, "  -targa         Input file is Targa format (usually not needed)\n");
+#endif
+  fprintf(stderr, "Switches for advanced users:\n");
+#ifdef DCT_ISLOW_SUPPORTED
+  fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
+         (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
+         (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
+         (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+#endif
+  fprintf(stderr, "  -restart N     Set restart interval in rows, or in blocks with B\n");
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  fprintf(stderr, "  -smooth N      Smooth dithered input (N=1..100 is strength)\n");
+#endif
+#ifndef FREE_MEM_ESTIMATE
+  fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
+#endif
+  fprintf(stderr, "  -outfile name  Specify name for output file\n");
+  fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
+  fprintf(stderr, "Switches for wizards:\n");
+#ifdef C_ARITH_CODING_SUPPORTED
+  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
+#endif
+  fprintf(stderr, "  -baseline      Force baseline quantization tables\n");
+  fprintf(stderr, "  -qtables file  Use quantization tables given in file\n");
+  fprintf(stderr, "  -qslots N[,...]    Set component quantization tables\n");
+  fprintf(stderr, "  -sample HxV[,...]  Set component sampling factors\n");
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  fprintf(stderr, "  -scans file    Create multi-scan JPEG per script file\n");
+#endif
+  exit(EXIT_FAILURE);
+}
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+LOCAL(void)
+print_simd_info (FILE * file, char * labelstr, unsigned int simd)
+{
+  fprintf(file, "%s%s%s%s%s%s\n", labelstr,
+         simd & JSIMD_MMX   ? " MMX"    : "",
+         simd & JSIMD_3DNOW ? " 3DNow!" : "",
+         simd & JSIMD_SSE   ? " SSE"    : "",
+         simd & JSIMD_SSE2  ? " SSE2"   : "",
+         simd == JSIMD_NONE ? " NONE"   : "");
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+
+
+LOCAL(int)
+parse_switches (j_compress_ptr cinfo, int argc, char **argv,
+               int last_file_arg_seen, boolean for_real)
+/* Parse optional switches.
+ * Returns argv[] index of first file-name argument (== argc if none).
+ * Any file names with indexes <= last_file_arg_seen are ignored;
+ * they have presumably been processed in a previous iteration.
+ * (Pass 0 for last_file_arg_seen on the first or only iteration.)
+ * for_real is FALSE on the first (dummy) pass; we may skip any expensive
+ * processing.
+ */
+{
+  int argn;
+  char * arg;
+  int quality;                 /* -quality parameter */
+  int q_scale_factor;          /* scaling percentage for -qtables */
+  boolean force_baseline;
+  boolean simple_progressive;
+  char * qtablefile = NULL;    /* saves -qtables filename if any */
+  char * qslotsarg = NULL;     /* saves -qslots parm if any */
+  char * samplearg = NULL;     /* saves -sample parm if any */
+  char * scansarg = NULL;      /* saves -scans parm if any */
+
+  /* Set up default JPEG parameters. */
+  /* Note that default -quality level need not, and does not,
+   * match the default scaling for an explicit -qtables argument.
+   */
+  quality = 75;                        /* default -quality value */
+  q_scale_factor = 100;                /* default to no scaling for -qtables */
+  force_baseline = FALSE;      /* by default, allow 16-bit quantizers */
+  simple_progressive = FALSE;
+  is_targa = FALSE;
+  outfilename = NULL;
+  cinfo->err->trace_level = 0;
+  if (default_maxmem > 0)      /* override library's default value */
+    cinfo->mem->max_memory_to_use = default_maxmem;
+
+  /* Scan command line options, adjust parameters */
+
+  for (argn = 1; argn < argc; argn++) {
+    arg = argv[argn];
+    if (*arg != '-') {
+      /* Not a switch, must be a file name argument */
+      if (argn <= last_file_arg_seen) {
+       outfilename = NULL;     /* -outfile applies to just one input file */
+       continue;               /* ignore this name if previously processed */
+      }
+      break;                   /* else done parsing switches */
+    }
+    arg++;                     /* advance past switch marker character */
+
+    if (keymatch(arg, "arithmetic", 1)) {
+      /* Use arithmetic coding. */
+#ifdef C_ARITH_CODING_SUPPORTED
+      cinfo->arith_code = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
+             progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "baseline", 1)) {
+      /* Force baseline-compatible output (8-bit quantizer values). */
+      force_baseline = TRUE;
+
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+    } else if (keymatch(arg, "nosimd" , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL);
+    } else if (keymatch(arg, "nommx"  , 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX);
+    } else if (keymatch(arg, "no3dnow", 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW);
+    } else if (keymatch(arg, "nosse"  , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE);
+    } else if (keymatch(arg, "nosse2" , 6)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2);
+#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */
+
+    } else if (keymatch(arg, "dct", 2)) {
+      /* Select DCT algorithm. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (keymatch(argv[argn], "int", 1)) {
+       cinfo->dct_method = JDCT_ISLOW;
+      } else if (keymatch(argv[argn], "fast", 2)) {
+       cinfo->dct_method = JDCT_IFAST;
+      } else if (keymatch(argv[argn], "float", 2)) {
+       cinfo->dct_method = JDCT_FLOAT;
+      } else
+       usage();
+
+    } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
+      /* Enable debug printouts. */
+      /* On first -d, print version identification */
+      static boolean printed_version = FALSE;
+
+      if (! printed_version) {
+       fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n",
+               JVERSION, JCOPYRIGHT);
+       fprintf(stderr,
+               "\nx86 SIMD extension for IJG JPEG library, version %s\n\n",
+               JPEG_SIMDEXT_VER_STR);
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+       print_simd_info(stderr, "SIMD instructions supported by the system :",
+                       jpeg_simd_support(NULL));
+
+       fprintf(stderr, "\n      === SIMD Operation Modes ===\n");
+#ifdef DCT_ISLOW_SUPPORTED
+       print_simd_info(stderr, "Accurate integer DCT  (-dct int)   :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_ISLOW));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+       print_simd_info(stderr, "Fast integer DCT      (-dct fast)  :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_IFAST));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+       print_simd_info(stderr, "Floating-point DCT    (-dct float) :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_FLOAT));
+#endif
+       print_simd_info(stderr, "Downsampling (-sample 2x2 or 2x1)  :",
+                       jpeg_simd_downsampler(cinfo));
+       print_simd_info(stderr, "Colorspace conversion (RGB->YCbCr) :",
+                       jpeg_simd_color_converter(cinfo));
+       fprintf(stderr, "\n");
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+       printed_version = TRUE;
+      }
+      cinfo->err->trace_level++;
+
+    } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) {
+      /* Force a monochrome JPEG file to be generated. */
+      jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+
+    } else if (keymatch(arg, "maxmemory", 3)) {
+      /* Maximum memory in Kb (or Mb with 'm'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+       usage();
+      if (ch == 'm' || ch == 'M')
+       lval *= 1000L;
+      cinfo->mem->max_memory_to_use = lval * 1000L;
+
+    } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
+      /* Enable entropy parm optimization. */
+#ifdef ENTROPY_OPT_SUPPORTED
+      cinfo->optimize_coding = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
+             progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "outfile", 4)) {
+      /* Set output file name. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      outfilename = argv[argn];        /* save it away for later use */
+
+    } else if (keymatch(arg, "progressive", 1)) {
+      /* Select simple progressive mode. */
+#ifdef C_PROGRESSIVE_SUPPORTED
+      simple_progressive = TRUE;
+      /* We must postpone execution until num_components is known. */
+#else
+      fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
+             progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "quality", 1)) {
+      /* Quality factor (quantization table scaling factor). */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%d", &quality) != 1)
+       usage();
+      /* Change scale factor in case -qtables is present. */
+      q_scale_factor = jpeg_quality_scaling(quality);
+
+    } else if (keymatch(arg, "qslots", 2)) {
+      /* Quantization table slot numbers. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      qslotsarg = argv[argn];
+      /* Must delay setting qslots until after we have processed any
+       * colorspace-determining switches, since jpeg_set_colorspace sets
+       * default quant table numbers.
+       */
+
+    } else if (keymatch(arg, "qtables", 2)) {
+      /* Quantization tables fetched from file. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      qtablefile = argv[argn];
+      /* We postpone actually reading the file in case -quality comes later. */
+
+    } else if (keymatch(arg, "restart", 1)) {
+      /* Restart interval in MCU rows (or in MCUs with 'b'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+       usage();
+      if (lval < 0 || lval > 65535L)
+       usage();
+      if (ch == 'b' || ch == 'B') {
+       cinfo->restart_interval = (unsigned int) lval;
+       cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+      } else {
+       cinfo->restart_in_rows = (int) lval;
+       /* restart_interval will be computed during startup */
+      }
+
+    } else if (keymatch(arg, "sample", 2)) {
+      /* Set sampling factors. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      samplearg = argv[argn];
+      /* Must delay setting sample factors until after we have processed any
+       * colorspace-determining switches, since jpeg_set_colorspace sets
+       * default sampling factors.
+       */
+
+    } else if (keymatch(arg, "scans", 2)) {
+      /* Set scan script. */
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      scansarg = argv[argn];
+      /* We must postpone reading the file in case -progressive appears. */
+#else
+      fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
+             progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "smooth", 2)) {
+      /* Set input smoothing factor. */
+      int val;
+
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%d", &val) != 1)
+       usage();
+      if (val < 0 || val > 100)
+       usage();
+      cinfo->smoothing_factor = val;
+
+    } else if (keymatch(arg, "targa", 1)) {
+      /* Input file is Targa format. */
+      is_targa = TRUE;
+
+    } else {
+      usage();                 /* bogus switch */
+    }
+  }
+
+  /* Post-switch-scanning cleanup */
+
+  if (for_real) {
+
+    /* Set quantization tables for selected quality. */
+    /* Some or all may be overridden if -qtables is present. */
+    jpeg_set_quality(cinfo, quality, force_baseline);
+
+    if (qtablefile != NULL)    /* process -qtables if it was present */
+      if (! read_quant_tables(cinfo, qtablefile,
+                             q_scale_factor, force_baseline))
+       usage();
+
+    if (qslotsarg != NULL)     /* process -qslots if it was present */
+      if (! set_quant_slots(cinfo, qslotsarg))
+       usage();
+
+    if (samplearg != NULL)     /* process -sample if it was present */
+      if (! set_sample_factors(cinfo, samplearg))
+       usage();
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+    if (simple_progressive)    /* process -progressive; -scans can override */
+      jpeg_simple_progression(cinfo);
+#endif
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    if (scansarg != NULL)      /* process -scans if it was present */
+      if (! read_scan_script(cinfo, scansarg))
+       usage();
+#endif
+  }
+
+  return argn;                 /* return index of next arg (file name) */
+}
+
+
+/*
+ * Check for overwrite of an existing file; clear it with user
+ */
+
+#ifndef NO_OVERWRITE_CHECK
+
+LOCAL(boolean)
+is_write_ok (char * outfname)
+{
+  FILE * ofile;
+  int ch;
+
+  ofile = fopen(outfname, READ_BINARY);
+  if (ofile == NULL)
+    return TRUE;               /* not present */
+  fclose(ofile);               /* oops, it is present */
+
+  for (;;) {
+    fprintf(stderr, "%s already exists, overwrite it? [y/n] ",
+           outfname);
+    fflush(stderr);
+    ch = getc(stdin);
+    if (ch != '\n')            /* flush rest of line */
+      while (getc(stdin) != '\n')
+       /* nothing */;
+
+    switch (ch) {
+    case 'Y':
+    case 'y':
+      return TRUE;
+    case 'N':
+    case 'n':
+      return FALSE;
+    /* otherwise, ask again */
+    }
+  }
+}
+
+#endif
+
+
+/*
+ * Process a single input file name, and return its index in argv[].
+ * File names at or to left of old_file_index have been processed already.
+ */
+
+LOCAL(int)
+process_one_file (int argc, char **argv, int old_file_index)
+{
+  struct jpeg_compress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+  char *infilename;
+  char workfilename[PATH_MAX];
+#ifdef PROGRESS_REPORT
+  struct cdjpeg_progress_mgr progress;
+#endif
+  int file_index;
+  cjpeg_source_ptr src_mgr;
+  FILE * input_file = NULL;
+  FILE * output_file = NULL;
+  JDIMENSION num_scanlines;
+
+  /* Initialize the JPEG compression object with default error handling. */
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_compress(&cinfo);
+  /* Add some application-specific error messages (from cderror.h) */
+  jerr.addon_message_table = cdjpeg_message_table;
+  jerr.first_addon_message = JMSG_FIRSTADDONCODE;
+  jerr.last_addon_message = JMSG_LASTADDONCODE;
+
+  /* Now safe to enable signal catcher. */
+#ifdef NEED_SIGNAL_CATCHER
+  enable_signal_catcher((j_common_ptr) &cinfo);
+#endif
+
+  /* Initialize JPEG parameters.
+   * Much of this may be overridden later.
+   * In particular, we don't yet know the input file's color space,
+   * but we need to provide some value for jpeg_set_defaults() to work.
+   */
+
+  cinfo.in_color_space = JCS_RGB; /* arbitrary guess */
+  jpeg_set_defaults(&cinfo);
+
+  /* Scan command line to find next file name.
+   * It is convenient to use just one switch-parsing routine, but the switch
+   * values read here are ignored; we will rescan the switches after opening
+   * the input file.
+   */
+
+  file_index = parse_switches(&cinfo, argc, argv, old_file_index, FALSE);
+  if (file_index >= argc) {
+    fprintf(stderr, "%s: missing input file name\n", progname);
+    usage();
+  }
+
+  /* Open the input file. */
+  infilename = argv[file_index];
+  if ((input_file = fopen(infilename, READ_BINARY)) == NULL) {
+    fprintf(stderr, "%s: can't open %s\n", progname, infilename);
+    goto fail;
+  }
+
+#ifdef PROGRESS_REPORT
+  start_progress_monitor((j_common_ptr) &cinfo, &progress);
+#endif
+
+  /* Figure out the input file format, and set up to read it. */
+  src_mgr = select_file_type(&cinfo, input_file);
+  src_mgr->input_file = input_file;
+
+  /* Read the input file header to obtain file size & colorspace. */
+  (*src_mgr->start_input) (&cinfo, src_mgr);
+
+  /* Now that we know input colorspace, fix colorspace-dependent defaults */
+  jpeg_default_colorspace(&cinfo);
+
+  /* Adjust default compression parameters by re-parsing the options */
+  file_index = parse_switches(&cinfo, argc, argv, old_file_index, TRUE);
+
+  /* If user didn't supply -outfile switch, select output file name. */
+  if (outfilename == NULL) {
+    int i;
+
+    outfilename = workfilename;
+    /* Make outfilename be infilename with .jpg substituted for extension */
+    strcpy(outfilename, infilename);
+    for (i = strlen(outfilename)-1; i >= 0; i--) {
+      switch (outfilename[i]) {
+      case ':':
+      case '/':
+      case '\\':
+       i = 0;                  /* stop scanning */
+       break;
+      case '.':
+       outfilename[i] = '\0';  /* lop off existing extension */
+       i = 0;                  /* stop scanning */
+       break;
+      default:
+       break;                  /* keep scanning */
+      }
+    }
+    strcat(outfilename, ".jpg");
+  }
+
+  fprintf(stderr, "Compressing %s => %s\n", infilename, outfilename);
+#ifndef NO_OVERWRITE_CHECK
+  if (! is_write_ok(outfilename))
+    goto fail;
+#endif
+
+  /* Open the output file. */
+  if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
+    fprintf(stderr, "%s: can't create %s\n", progname, outfilename);
+    goto fail;
+  }
+
+  /* Specify data destination for compression */
+  jpeg_stdio_dest(&cinfo, output_file);
+
+  /* Start compressor */
+  jpeg_start_compress(&cinfo, TRUE);
+
+  /* Process data */
+  while (cinfo.next_scanline < cinfo.image_height) {
+    num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr);
+    (void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines);
+  }
+
+  /* Finish compression and release memory */
+  (*src_mgr->finish_input) (&cinfo, src_mgr);
+  jpeg_finish_compress(&cinfo);
+
+  /* Clean up and exit */
+fail:
+  jpeg_destroy_compress(&cinfo);
+
+  if (input_file != NULL) fclose(input_file);
+  if (output_file != NULL) fclose(output_file);
+
+#ifdef PROGRESS_REPORT
+  end_progress_monitor((j_common_ptr) &cinfo);
+#endif
+
+  /* Disable signal catcher. */
+#ifdef NEED_SIGNAL_CATCHER
+  enable_signal_catcher((j_common_ptr) NULL);
+#endif
+
+  return file_index;
+}
+
+
+/*
+ * The main program.
+ */
+
+int
+main (int argc, char **argv)
+{
+  int file_index;
+
+  /* On Mac, fetch a command line. */
+#ifdef USE_CCOMMAND
+  argc = ccommand(&argv);
+#endif
+
+#ifdef MSDOS
+  progname = "cjpeg";          /* DOS tends to be too verbose about argv[0] */
+#else
+  progname = argv[0];
+  if (progname == NULL || progname[0] == 0)
+    progname = "cjpeg";                /* in case C library doesn't provide it */
+#endif
+
+  /* The default maxmem must be computed only once at program startup,
+   * since releasing memory with free() won't give it back to the OS.
+   */
+#ifdef FREE_MEM_ESTIMATE
+  default_maxmem = FREE_MEM_ESTIMATE;
+#else
+  default_maxmem = 0;
+#endif
+
+  /* Scan command line, parse switches and locate input file names */
+
+  if (argc < 2)
+    usage();                   /* nothing on the command line?? */
+
+  file_index = 0;
+
+  while (file_index < argc-1)
+    file_index = process_one_file(argc, argv, file_index);
+
+  /* All done. */
+  exit(EXIT_SUCCESS);
+  return 0;                    /* suppress no-return-value warnings */
+}
diff --git a/altui/djpeg.c b/altui/djpeg.c
new file mode 100644 (file)
index 0000000..a000d45
--- /dev/null
@@ -0,0 +1,836 @@
+/*
+ * alternate djpeg.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 6, 2006
+ * ---------------------------------------------------------------------
+ *
+ * This file contains an alternate user interface for the JPEG decompressor.
+ * One or more input files are named on the command line, and output file
+ * names are created by substituting an appropriate extension.
+ */
+
+#include "cdjpeg.h"            /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"          /* for version message */
+
+#include <ctype.h>             /* to declare isprint() */
+
+#ifdef USE_CCOMMAND            /* command-line reader for Macintosh */
+#ifdef __MWERKS__
+#include <SIOUX.h>              /* Metrowerks needs this */
+#include <console.h>           /* ... and this */
+#endif
+#ifdef THINK_C
+#include <console.h>           /* Think declares it here */
+#endif
+#endif
+
+#ifndef PATH_MAX               /* ANSI maximum-pathname-length constant */
+#define PATH_MAX 256
+#endif
+
+
+/* Create the add-on message string table. */
+
+#define JMESSAGE(code,string)  string ,
+
+static const char * const cdjpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+
+/*
+ * SIMD Ext: compiler-specific hacks to enable filename wild-card expansion
+ */
+
+#ifdef _MSC_VER                /* Microsoft Visual C++ */
+/* from setargv.c (setargv.obj) */
+/* Tested under Visual C++ V6.0, Toolkit 2003, and 2005 Express Edition */
+int __cdecl _setargv(void) { int __cdecl __setargv(void); return __setargv(); }
+#endif
+#ifdef __BORLANDC__    /* Borland C++ */
+/* from wildargs.c (wildargs.obj) */
+/* Tested under Borland C++ Compiler 5.5 (win32) */
+#include <wildargs.h>
+typedef void _RTLENTRY (* _RTLENTRY _argv_expand_fnc)(char *, _PFN_ADDARG);
+_argv_expand_fnc _argv_expand_ptr = _expand_wild;
+#endif
+
+
+/*
+ * Automatic determination of available memory.
+ */
+
+static long default_maxmem;    /* saves value determined at startup, or 0 */
+
+#ifndef FREE_MEM_ESTIMATE      /* may be defined from command line */
+
+#ifdef MSDOS                   /* For MS-DOS (unless flat-memory model) */
+
+#include <dos.h>               /* for access to intdos() call */
+
+LOCAL(long)
+unused_dos_memory (void)
+/* Obtain total amount of unallocated DOS memory */
+{
+  union REGS regs;
+  long nparas;
+
+  regs.h.ah = 0x48;            /* DOS function Allocate Memory Block */
+  regs.x.bx = 0xFFFF;          /* Ask for more memory than DOS can have */
+  (void) intdos(&regs, &regs);
+  /* DOS will fail and return # of paragraphs actually available in BX. */
+  nparas = (unsigned int) regs.x.bx;
+  /* Times 16 to convert to bytes. */
+  return nparas << 4;
+}
+
+/* The default memory setting is 95% of the available space. */
+#define FREE_MEM_ESTIMATE  ((unused_dos_memory() * 95L) / 100L)
+
+#endif /* MSDOS */
+
+#ifdef ATARI                   /* For Atari ST/STE/TT, Pure C or Turbo C */
+
+#include <ext.h>
+
+/* The default memory setting is 90% of the available space. */
+#define FREE_MEM_ESTIMATE  (((long) coreleft() * 90L) / 100L)
+
+#endif /* ATARI */
+
+/* Add memory-estimation procedures for other operating systems here,
+ * with appropriate #ifdef's around them.
+ */
+
+#endif /* !FREE_MEM_ESTIMATE */
+
+
+/*
+ * This list defines the known output image formats
+ * (not all of which need be supported by a given version).
+ * You can change the default output format by defining DEFAULT_FMT;
+ * indeed, you had better do so if you undefine PPM_SUPPORTED.
+ */
+
+typedef enum {
+       FMT_BMP,                /* BMP format (Windows flavor) */
+       FMT_GIF,                /* GIF format */
+       FMT_OS2,                /* BMP format (OS/2 flavor) */
+       FMT_PPM,                /* PPM/PGM (PBMPLUS formats) */
+       FMT_RLE,                /* RLE format */
+       FMT_TARGA,              /* Targa format */
+       FMT_TIFF                /* TIFF format */
+} IMAGE_FORMATS;
+
+#ifndef DEFAULT_FMT            /* so can override from CFLAGS in Makefile */
+#define DEFAULT_FMT    FMT_GIF
+#endif
+
+static IMAGE_FORMATS requested_fmt;
+
+
+/*
+ * Argument-parsing code.
+ * The switch parser is designed to be useful with DOS-style command line
+ * syntax, ie, intermixed switches and file names, where only the switches
+ * to the left of a given file name affect processing of that file.
+ */
+
+
+static const char * progname;  /* program name for error messages */
+static char * outfilename;     /* for -outfile switch */
+
+
+LOCAL(void)
+usage (void)
+/* complain about bad command line */
+{
+  fprintf(stderr, "usage: %s [switches] inputfile(s)\n", progname);
+  fprintf(stderr, "List of input files may use wildcards (* and ?)\n");
+  fprintf(stderr, "Output filename is same as input filename except for extension\n");
+
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -colors N      Reduce image to no more than N colors\n");
+  fprintf(stderr, "  -fast          Fast, low-quality processing\n");
+  fprintf(stderr, "  -grayscale     Force grayscale output\n");
+#ifdef IDCT_SCALING_SUPPORTED
+  fprintf(stderr, "  -scale M/N     Scale output image by fraction M/N, eg, 1/8\n");
+#endif
+#ifdef BMP_SUPPORTED
+  fprintf(stderr, "  -bmp           Select BMP output format (Windows style)%s\n",
+         (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
+#endif
+#ifdef GIF_SUPPORTED
+  fprintf(stderr, "  -gif           Select GIF output format%s\n",
+         (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
+#endif
+#ifdef BMP_SUPPORTED
+  fprintf(stderr, "  -os2           Select BMP output format (OS/2 style)%s\n",
+         (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
+#endif
+#ifdef PPM_SUPPORTED
+  fprintf(stderr, "  -pnm           Select PBMPLUS (PPM/PGM) output format%s\n",
+         (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
+#endif
+#ifdef RLE_SUPPORTED
+  fprintf(stderr, "  -rle           Select Utah RLE output format%s\n",
+         (DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
+#endif
+#ifdef TARGA_SUPPORTED
+  fprintf(stderr, "  -targa         Select Targa output format%s\n",
+         (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
+#endif
+  fprintf(stderr, "Switches for advanced users:\n");
+#ifdef DCT_ISLOW_SUPPORTED
+  fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
+         (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
+         (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
+         (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+#endif
+  fprintf(stderr, "  -dither fs     Use F-S dithering (default)\n");
+  fprintf(stderr, "  -dither none   Don't use dithering in quantization\n");
+  fprintf(stderr, "  -dither ordered  Use ordered dither (medium speed, quality)\n");
+#ifdef QUANT_2PASS_SUPPORTED
+  fprintf(stderr, "  -map FILE      Map to colors used in named image file\n");
+#endif
+  fprintf(stderr, "  -nosmooth      Don't use high-quality upsampling\n");
+#ifdef QUANT_1PASS_SUPPORTED
+  fprintf(stderr, "  -onepass       Use 1-pass quantization (fast, low quality)\n");
+#endif
+#ifndef FREE_MEM_ESTIMATE
+  fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
+#endif
+  fprintf(stderr, "  -outfile name  Specify name for output file\n");
+  fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
+  exit(EXIT_FAILURE);
+}
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+LOCAL(void)
+print_simd_info (FILE * file, char * labelstr, unsigned int simd)
+{
+  fprintf(file, "%s%s%s%s%s%s\n", labelstr,
+         simd & JSIMD_MMX   ? " MMX"    : "",
+         simd & JSIMD_3DNOW ? " 3DNow!" : "",
+         simd & JSIMD_SSE   ? " SSE"    : "",
+         simd & JSIMD_SSE2  ? " SSE2"   : "",
+         simd == JSIMD_NONE ? " NONE"   : "");
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+
+
+LOCAL(int)
+parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
+               int last_file_arg_seen, boolean for_real)
+/* Parse optional switches.
+ * Returns argv[] index of first file-name argument (== argc if none).
+ * Any file names with indexes <= last_file_arg_seen are ignored;
+ * they have presumably been processed in a previous iteration.
+ * (Pass 0 for last_file_arg_seen on the first or only iteration.)
+ * for_real is FALSE on the first (dummy) pass; we may skip any expensive
+ * processing.
+ */
+{
+  int argn;
+  char * arg;
+
+  /* Set up default JPEG parameters. */
+  requested_fmt = DEFAULT_FMT; /* set default output file format */
+  outfilename = NULL;
+  cinfo->err->trace_level = 0;
+  if (default_maxmem > 0)      /* override library's default value */
+    cinfo->mem->max_memory_to_use = default_maxmem;
+
+  /* Scan command line options, adjust parameters */
+
+  for (argn = 1; argn < argc; argn++) {
+    arg = argv[argn];
+    if (*arg != '-') {
+      /* Not a switch, must be a file name argument */
+      if (argn <= last_file_arg_seen) {
+       outfilename = NULL;     /* -outfile applies to just one input file */
+       continue;               /* ignore this name if previously processed */
+      }
+      break;                   /* else done parsing switches */
+    }
+    arg++;                     /* advance past switch marker character */
+
+    if (keymatch(arg, "bmp", 1)) {
+      /* BMP output format. */
+      requested_fmt = FMT_BMP;
+
+    } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) ||
+              keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
+      /* Do color quantization. */
+      int val;
+
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%d", &val) != 1)
+       usage();
+      cinfo->desired_number_of_colors = val;
+      cinfo->quantize_colors = TRUE;
+
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+    } else if (keymatch(arg, "nosimd" , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL);
+    } else if (keymatch(arg, "nommx"  , 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX);
+    } else if (keymatch(arg, "no3dnow", 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW);
+    } else if (keymatch(arg, "nosse"  , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE);
+    } else if (keymatch(arg, "nosse2" , 6)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2);
+#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */
+
+    } else if (keymatch(arg, "dct", 2)) {
+      /* Select IDCT algorithm. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (keymatch(argv[argn], "int", 1)) {
+       cinfo->dct_method = JDCT_ISLOW;
+      } else if (keymatch(argv[argn], "fast", 2)) {
+       cinfo->dct_method = JDCT_IFAST;
+      } else if (keymatch(argv[argn], "float", 2)) {
+       cinfo->dct_method = JDCT_FLOAT;
+      } else
+       usage();
+
+    } else if (keymatch(arg, "dither", 2)) {
+      /* Select dithering algorithm. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (keymatch(argv[argn], "fs", 2)) {
+       cinfo->dither_mode = JDITHER_FS;
+      } else if (keymatch(argv[argn], "none", 2)) {
+       cinfo->dither_mode = JDITHER_NONE;
+      } else if (keymatch(argv[argn], "ordered", 2)) {
+       cinfo->dither_mode = JDITHER_ORDERED;
+      } else
+       usage();
+
+    } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
+      /* Enable debug printouts. */
+      /* On first -d, print version identification */
+      static boolean printed_version = FALSE;
+
+      if (! printed_version) {
+       fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n",
+               JVERSION, JCOPYRIGHT);
+       fprintf(stderr,
+               "\nx86 SIMD extension for IJG JPEG library, version %s\n\n",
+               JPEG_SIMDEXT_VER_STR);
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+       print_simd_info(stderr, "SIMD instructions supported by the system :",
+                       jpeg_simd_support(NULL));
+
+       fprintf(stderr, "\n      === SIMD Operation Modes ===\n");
+#ifdef DCT_ISLOW_SUPPORTED
+       print_simd_info(stderr, "Accurate integer DCT  (-dct int)   :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_ISLOW));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+       print_simd_info(stderr, "Fast integer DCT      (-dct fast)  :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_IFAST));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+       print_simd_info(stderr, "Floating-point DCT    (-dct float) :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT));
+#endif
+#ifdef IDCT_SCALING_SUPPORTED
+       print_simd_info(stderr, "Reduced-size DCT      (-scale M/N) :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT+1));
+#endif
+       print_simd_info(stderr, "High-quality upsampling (default)  :",
+                       jpeg_simd_upsampler(cinfo, TRUE));
+       print_simd_info(stderr, "Low-quality upsampling (-nosmooth) :",
+                       jpeg_simd_upsampler(cinfo, FALSE));
+       print_simd_info(stderr, "Colorspace conversion (YCbCr->RGB) :",
+                       jpeg_simd_color_deconverter(cinfo));
+       fprintf(stderr, "\n");
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+       printed_version = TRUE;
+      }
+      cinfo->err->trace_level++;
+
+    } else if (keymatch(arg, "fast", 1)) {
+      /* Select recommended processing options for quick-and-dirty output. */
+      cinfo->two_pass_quantize = FALSE;
+      cinfo->dither_mode = JDITHER_ORDERED;
+      if (! cinfo->quantize_colors) /* don't override an earlier -colors */
+       cinfo->desired_number_of_colors = 216;
+      cinfo->dct_method = JDCT_FASTEST;
+      cinfo->do_fancy_upsampling = FALSE;
+
+    } else if (keymatch(arg, "gif", 1)) {
+      /* GIF output format. */
+      requested_fmt = FMT_GIF;
+
+    } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) {
+      /* Force monochrome output. */
+      cinfo->out_color_space = JCS_GRAYSCALE;
+
+    } else if (keymatch(arg, "map", 3)) {
+      /* Quantize to a color map taken from an input file. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (for_real) {          /* too expensive to do twice! */
+#ifdef QUANT_2PASS_SUPPORTED   /* otherwise can't quantize to supplied map */
+       FILE * mapfile;
+
+       if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
+         fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+         exit(EXIT_FAILURE);
+       }
+       read_color_map(cinfo, mapfile);
+       fclose(mapfile);
+       cinfo->quantize_colors = TRUE;
+#else
+       ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      }
+
+    } else if (keymatch(arg, "maxmemory", 3)) {
+      /* Maximum memory in Kb (or Mb with 'm'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+       usage();
+      if (ch == 'm' || ch == 'M')
+       lval *= 1000L;
+      cinfo->mem->max_memory_to_use = lval * 1000L;
+
+    } else if (keymatch(arg, "nosmooth", 3)) {
+      /* Suppress fancy upsampling */
+      cinfo->do_fancy_upsampling = FALSE;
+
+    } else if (keymatch(arg, "onepass", 3)) {
+      /* Use fast one-pass quantization. */
+      cinfo->two_pass_quantize = FALSE;
+
+    } else if (keymatch(arg, "os2", 3)) {
+      /* BMP output format (OS/2 flavor). */
+      requested_fmt = FMT_OS2;
+
+    } else if (keymatch(arg, "outfile", 4)) {
+      /* Set output file name. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      outfilename = argv[argn];        /* save it away for later use */
+
+    } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
+      /* PPM/PGM output format. */
+      requested_fmt = FMT_PPM;
+
+    } else if (keymatch(arg, "rle", 1)) {
+      /* RLE output format. */
+      requested_fmt = FMT_RLE;
+
+    } else if (keymatch(arg, "scale", 1)) {
+      /* Scale the output image by a fraction M/N. */
+      if (++argn >= argc)      /* advance to next argument */
+       usage();
+      if (sscanf(argv[argn], "%d/%d",
+                &cinfo->scale_num, &cinfo->scale_denom) != 2)
+       usage();
+
+    } else if (keymatch(arg, "targa", 1)) {
+      /* Targa output format. */
+      requested_fmt = FMT_TARGA;
+
+    } else {
+      usage();                 /* bogus switch */
+    }
+  }
+
+  return argn;                 /* return index of next arg (file name) */
+}
+
+
+/*
+ * Marker processor for COM and interesting APPn markers.
+ * This replaces the library's built-in processor, which just skips the marker.
+ * We want to print out the marker as text, to the extent possible.
+ * Note this code relies on a non-suspending data source.
+ */
+
+LOCAL(unsigned int)
+jpeg_getc (j_decompress_ptr cinfo)
+/* Read next byte */
+{
+  struct jpeg_source_mgr * datasrc = cinfo->src;
+
+  if (datasrc->bytes_in_buffer == 0) {
+    if (! (*datasrc->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+  datasrc->bytes_in_buffer--;
+  return GETJOCTET(*datasrc->next_input_byte++);
+}
+
+
+METHODDEF(boolean)
+print_text_marker (j_decompress_ptr cinfo)
+{
+  boolean traceit = (cinfo->err->trace_level >= 1);
+  INT32 length;
+  unsigned int ch;
+  unsigned int lastch = 0;
+
+  length = jpeg_getc(cinfo) << 8;
+  length += jpeg_getc(cinfo);
+  length -= 2;                 /* discount the length word itself */
+
+  if (traceit) {
+    if (cinfo->unread_marker == JPEG_COM)
+      fprintf(stderr, "Comment, length %ld:\n", (long) length);
+    else                       /* assume it is an APPn otherwise */
+      fprintf(stderr, "APP%d, length %ld:\n",
+             cinfo->unread_marker - JPEG_APP0, (long) length);
+  }
+
+  while (--length >= 0) {
+    ch = jpeg_getc(cinfo);
+    if (traceit) {
+      /* Emit the character in a readable form.
+       * Nonprintables are converted to \nnn form,
+       * while \ is converted to \\.
+       * Newlines in CR, CR/LF, or LF form will be printed as one newline.
+       */
+      if (ch == '\r') {
+       fprintf(stderr, "\n");
+      } else if (ch == '\n') {
+       if (lastch != '\r')
+         fprintf(stderr, "\n");
+      } else if (ch == '\\') {
+       fprintf(stderr, "\\\\");
+      } else if (isprint(ch)) {
+       putc(ch, stderr);
+      } else {
+       fprintf(stderr, "\\%03o", ch);
+      }
+      lastch = ch;
+    }
+  }
+
+  if (traceit)
+    fprintf(stderr, "\n");
+
+  return TRUE;
+}
+
+
+/*
+ * Check for overwrite of an existing file; clear it with user
+ */
+
+#ifndef NO_OVERWRITE_CHECK
+
+LOCAL(boolean)
+is_write_ok (char * outfname)
+{
+  FILE * ofile;
+  int ch;
+
+  ofile = fopen(outfname, READ_BINARY);
+  if (ofile == NULL)
+    return TRUE;               /* not present */
+  fclose(ofile);               /* oops, it is present */
+
+  for (;;) {
+    fprintf(stderr, "%s already exists, overwrite it? [y/n] ",
+           outfname);
+    fflush(stderr);
+    ch = getc(stdin);
+    if (ch != '\n')            /* flush rest of line */
+      while (getc(stdin) != '\n')
+       /* nothing */;
+
+    switch (ch) {
+    case 'Y':
+    case 'y':
+      return TRUE;
+    case 'N':
+    case 'n':
+      return FALSE;
+    /* otherwise, ask again */
+    }
+  }
+}
+
+#endif
+
+
+/*
+ * Process a single input file name, and return its index in argv[].
+ * File names at or to left of old_file_index have been processed already.
+ */
+
+LOCAL(int)
+process_one_file (int argc, char **argv, int old_file_index)
+{
+  struct jpeg_decompress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+  char *infilename;
+  char workfilename[PATH_MAX];
+  const char *default_extension = NULL;
+#ifdef PROGRESS_REPORT
+  struct cdjpeg_progress_mgr progress;
+#endif
+  int file_index;
+  djpeg_dest_ptr dest_mgr = NULL;
+  FILE * input_file = NULL;
+  FILE * output_file = NULL;
+  JDIMENSION num_scanlines;
+
+  /* Initialize the JPEG decompression object with default error handling. */
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_decompress(&cinfo);
+  /* Add some application-specific error messages (from cderror.h) */
+  jerr.addon_message_table = cdjpeg_message_table;
+  jerr.first_addon_message = JMSG_FIRSTADDONCODE;
+  jerr.last_addon_message = JMSG_LASTADDONCODE;
+
+  /* Insert custom marker processor for COM and APP12.
+   * APP12 is used by some digital camera makers for textual info,
+   * so we provide the ability to display it as text.
+   * If you like, additional APPn marker types can be selected for display,
+   * but don't try to override APP0 or APP14 this way (see libjpeg.doc).
+   */
+  jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker);
+  jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker);
+
+  /* Now safe to enable signal catcher. */
+#ifdef NEED_SIGNAL_CATCHER
+  enable_signal_catcher((j_common_ptr) &cinfo);
+#endif
+
+  /* Scan command line to find next file name.
+   * It is convenient to use just one switch-parsing routine, but the switch
+   * values read here are ignored; we will rescan the switches after opening
+   * the input file.
+   * (Exception: tracing level set here controls verbosity for COM markers
+   * found during jpeg_read_header...)
+   */
+
+  file_index = parse_switches(&cinfo, argc, argv, old_file_index, FALSE);
+  if (file_index >= argc) {
+    fprintf(stderr, "%s: missing input file name\n", progname);
+    usage();
+  }
+
+  /* Open the input file. */
+  infilename = argv[file_index];
+  if ((input_file = fopen(infilename, READ_BINARY)) == NULL) {
+    fprintf(stderr, "%s: can't open %s\n", progname, infilename);
+    goto fail;
+  }
+
+#ifdef PROGRESS_REPORT
+  start_progress_monitor((j_common_ptr) &cinfo, &progress);
+#endif
+
+  /* Specify data source for decompression */
+  jpeg_stdio_src(&cinfo, input_file);
+
+  /* Read file header, set default decompression parameters */
+  (void) jpeg_read_header(&cinfo, TRUE);
+
+  /* Adjust default decompression parameters by re-parsing the options */
+  file_index = parse_switches(&cinfo, argc, argv, old_file_index, TRUE);
+
+  /* Initialize the output module now to let it override any crucial
+   * option settings (for instance, GIF wants to force color quantization).
+   */
+  switch (requested_fmt) {
+#ifdef BMP_SUPPORTED
+  case FMT_BMP:
+    dest_mgr = jinit_write_bmp(&cinfo, FALSE);
+    default_extension = ".bmp";
+    break;
+  case FMT_OS2:
+    dest_mgr = jinit_write_bmp(&cinfo, TRUE);
+    default_extension = ".bmp";
+    break;
+#endif
+#ifdef GIF_SUPPORTED
+  case FMT_GIF:
+    dest_mgr = jinit_write_gif(&cinfo);
+    default_extension = ".gif";
+    break;
+#endif
+#ifdef PPM_SUPPORTED
+  case FMT_PPM:
+    dest_mgr = jinit_write_ppm(&cinfo);
+    default_extension = ".ppm";
+    break;
+#endif
+#ifdef RLE_SUPPORTED
+  case FMT_RLE:
+    dest_mgr = jinit_write_rle(&cinfo);
+    default_extension = ".rle";
+    break;
+#endif
+#ifdef TARGA_SUPPORTED
+  case FMT_TARGA:
+    dest_mgr = jinit_write_targa(&cinfo);
+    default_extension = ".tga";
+    break;
+#endif
+  default:
+    ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT);
+    break;
+  }
+
+  /* If user didn't supply -outfile switch, select output file name. */
+  if (outfilename == NULL) {
+    int i;
+
+    outfilename = workfilename;
+    /* Make outfilename be infilename with appropriate extension */
+    strcpy(outfilename, infilename);
+    for (i = strlen(outfilename)-1; i >= 0; i--) {
+      switch (outfilename[i]) {
+      case ':':
+      case '/':
+      case '\\':
+       i = 0;                  /* stop scanning */
+       break;
+      case '.':
+       outfilename[i] = '\0';  /* lop off existing extension */
+       i = 0;                  /* stop scanning */
+       break;
+      default:
+       break;                  /* keep scanning */
+      }
+    }
+    strcat(outfilename, default_extension);
+  }
+
+  fprintf(stderr, "Decompressing %s => %s\n", infilename, outfilename);
+#ifndef NO_OVERWRITE_CHECK
+  if (! is_write_ok(outfilename))
+    goto fail;
+#endif
+
+  /* Open the output file. */
+  if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
+    fprintf(stderr, "%s: can't create %s\n", progname, outfilename);
+    goto fail;
+  }
+  dest_mgr->output_file = output_file;
+
+  /* Start decompressor */
+  (void) jpeg_start_decompress(&cinfo);
+
+  /* Write output file header */
+  (*dest_mgr->start_output) (&cinfo, dest_mgr);
+
+  /* Process data */
+  while (cinfo.output_scanline < cinfo.output_height) {
+    num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
+                                       dest_mgr->buffer_height);
+    (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+  }
+
+#ifdef PROGRESS_REPORT
+  /* Hack: count final pass as done in case finish_output does an extra pass.
+   * The library won't have updated completed_passes.
+   */
+  progress.pub.completed_passes = progress.pub.total_passes;
+#endif
+
+  /* Finish decompression and release memory.
+   * I must do it in this order because output module has allocated memory
+   * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory.
+   */
+  (*dest_mgr->finish_output) (&cinfo, dest_mgr);
+  (void) jpeg_finish_decompress(&cinfo);
+
+  /* Clean up and exit */
+fail:
+  jpeg_destroy_decompress(&cinfo);
+
+  if (input_file != NULL) fclose(input_file);
+  if (output_file != NULL) fclose(output_file);
+
+#ifdef PROGRESS_REPORT
+  end_progress_monitor((j_common_ptr) &cinfo);
+#endif
+
+  /* Disable signal catcher. */
+#ifdef NEED_SIGNAL_CATCHER
+  enable_signal_catcher((j_common_ptr) NULL);
+#endif
+
+  return file_index;
+}
+
+
+/*
+ * The main program.
+ */
+
+int
+main (int argc, char **argv)
+{
+  int file_index;
+
+  /* On Mac, fetch a command line. */
+#ifdef USE_CCOMMAND
+  argc = ccommand(&argv);
+#endif
+
+#ifdef MSDOS
+  progname = "djpeg";          /* DOS tends to be too verbose about argv[0] */
+#else
+  progname = argv[0];
+  if (progname == NULL || progname[0] == 0)
+    progname = "djpeg";                /* in case C library doesn't provide it */
+#endif
+
+  /* The default maxmem must be computed only once at program startup,
+   * since releasing memory with free() won't give it back to the OS.
+   */
+#ifdef FREE_MEM_ESTIMATE
+  default_maxmem = FREE_MEM_ESTIMATE;
+#else
+  default_maxmem = 0;
+#endif
+
+  /* Scan command line, parse switches and locate input file names */
+
+  if (argc < 2)
+    usage();                   /* nothing on the command line?? */
+
+  file_index = 0;
+
+  while (file_index < argc-1)
+    file_index = process_one_file(argc, argv, file_index);
+
+  /* All done. */
+  exit(EXIT_SUCCESS);
+  return 0;                    /* suppress no-return-value warnings */
+}
diff --git a/altui/usage.alt b/altui/usage.alt
new file mode 100644 (file)
index 0000000..277332b
--- /dev/null
@@ -0,0 +1,62 @@
+(Most of the standard usage.doc file also applies to this alternate version,
+but replace its "GENERAL USAGE" section with the text below.  Edit the text
+as necessary if you don't support wildcards or overwrite checking.  Be sure
+to fix the djpeg switch descriptions if you are not defaulting to PPM output.
+Also, if you've provided an accurate memory-estimation procedure, you can
+probably eliminate the HINTS related to the -maxmemory switch.)
+
+
+GENERAL USAGE
+
+We provide two programs, cjpeg to compress an image file into JPEG format,
+and djpeg to decompress a JPEG file back into a conventional image format.
+
+The basic command line is:
+       cjpeg [switches] list of image files
+or
+       djpeg [switches] list of jpeg files
+
+Each file named is compressed or decompressed.  The input file(s) are not
+modified; the output data is written to files which have the same names
+except for extension.  cjpeg always uses ".jpg" for the output file name's
+extension; djpeg uses one of ".bmp", ".gif", ".ppm", ".rle", or ".tga",
+depending on what output format is selected by the switches.
+
+For example, to convert xxx.bmp to xxx.jpg and yyy.ppm to yyy.jpg, say:
+       cjpeg xxx.bmp yyy.ppm
+
+On most systems you can use standard wildcards to specify the list of input
+files; for example, on DOS "djpeg *.jpg" decompresses all the JPEG files in
+the current directory.
+
+If an intended output file already exists, you'll be asked whether or not to
+overwrite it.  If you say no, the program skips that input file and goes on
+to the next one.
+
+You can intermix switches and file names; for example
+       djpeg -gif file1.jpg -targa file2.jpg
+decompresses file1.jpg into GIF format (file1.gif) and file2.jpg into Targa
+format (file2.tga).  Only switches to the left of a given file name affect
+processing of that file; when there are conflicting switches, the rightmost
+one takes precedence.
+
+You can override the program's choice of output file name by using the
+-outfile switch, as in
+       cjpeg -outfile output.jpg input.ppm
+-outfile only affects the first input file name to its right.
+
+The currently supported image file formats are: PPM (PBMPLUS color format),
+PGM (PBMPLUS gray-scale format), BMP, GIF, Targa, and RLE (Utah Raster
+Toolkit format).  (RLE is supported only if the URT library is available,
+which it isn't on most non-Unix systems.)  cjpeg recognizes the input image
+format automatically, with the exception of some Targa-format files.  You
+have to tell djpeg which format to generate.
+
+JPEG files are in the defacto standard JFIF file format.  There are other,
+less widely used JPEG-based file formats, but we don't support them.
+
+All switch names may be abbreviated; for example, -grayscale may be written
+-gray or -gr.  Most of the "basic" switches can be abbreviated to as little as
+one letter.  Upper and lower case are equivalent (-BMP is the same as -bmp).
+British spellings are also accepted (e.g., -greyscale), though for brevity
+these are not mentioned below.
diff --git a/cjpeg.c b/cjpeg.c
index f2a929f0c9fa5c3f9b50b839604711cf4ca1074f..10f5f5b7a0d9c5c29de3b815281b5e49c78280fa 100644 (file)
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : August 23, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains a command-line user interface for the JPEG compressor.
  * It should work on any system with Unix- or MS-DOS-style command lines.
  *
@@ -195,6 +202,22 @@ usage (void)
 }
 
 
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+LOCAL(void)
+print_simd_info (FILE * file, char * labelstr, unsigned int simd)
+{
+  fprintf(file, "%s%s%s%s%s%s\n", labelstr,
+         simd & JSIMD_MMX   ? " MMX"    : "",
+         simd & JSIMD_3DNOW ? " 3DNow!" : "",
+         simd & JSIMD_SSE   ? " SSE"    : "",
+         simd & JSIMD_SSE2  ? " SSE2"   : "",
+         simd == JSIMD_NONE ? " NONE"   : "");
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+
+
 LOCAL(int)
 parse_switches (j_compress_ptr cinfo, int argc, char **argv,
                int last_file_arg_seen, boolean for_real)
@@ -258,6 +281,19 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
       /* Force baseline-compatible output (8-bit quantizer values). */
       force_baseline = TRUE;
 
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+    } else if (keymatch(arg, "nosimd" , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL);
+    } else if (keymatch(arg, "nommx"  , 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX);
+    } else if (keymatch(arg, "no3dnow", 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW);
+    } else if (keymatch(arg, "nosse"  , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE);
+    } else if (keymatch(arg, "nosse2" , 6)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2);
+#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */
+
     } else if (keymatch(arg, "dct", 2)) {
       /* Select DCT algorithm. */
       if (++argn >= argc)      /* advance to next argument */
@@ -279,6 +315,32 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
       if (! printed_version) {
        fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n",
                JVERSION, JCOPYRIGHT);
+       fprintf(stderr,
+               "\nx86 SIMD extension for IJG JPEG library, version %s\n\n",
+               JPEG_SIMDEXT_VER_STR);
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+       print_simd_info(stderr, "SIMD instructions supported by the system :",
+                       jpeg_simd_support(NULL));
+
+       fprintf(stderr, "\n      === SIMD Operation Modes ===\n");
+#ifdef DCT_ISLOW_SUPPORTED
+       print_simd_info(stderr, "Accurate integer DCT  (-dct int)   :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_ISLOW));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+       print_simd_info(stderr, "Fast integer DCT      (-dct fast)  :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_IFAST));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+       print_simd_info(stderr, "Floating-point DCT    (-dct float) :",
+                       jpeg_simd_forward_dct(cinfo, JDCT_FLOAT));
+#endif
+       print_simd_info(stderr, "Downsampling (-sample 2x2 or 2x1)  :",
+                       jpeg_simd_downsampler(cinfo));
+       print_simd_info(stderr, "Colorspace conversion (RGB->YCbCr) :",
+                       jpeg_simd_color_converter(cinfo));
+       fprintf(stderr, "\n");
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
index 34baf795b00957f4a48f28b0f5797fe9734d86a2..ba380dc6710c021229de098bddb1138292635ebb 100644 (file)
@@ -4,6 +4,13 @@
  * Copyright (C) 1991-1994, Thomas G. Lane.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
+ *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : March 28, 2005
+ * ---------------------------------------------------------------------
  */
 
 /*
@@ -361,6 +368,10 @@ int main (argc, argv)
   fprintf(outfile, "#define INCOMPLETE_TYPES_BROKEN\n");
 #else
   fprintf(outfile, "#undef INCOMPLETE_TYPES_BROKEN\n");
+#endif
+#ifdef _WIN32
+  fprintf(outfile, "\n/* Define "boolean" as unsigned char, not int, per Windows custom */\n");
+  fprintf(outfile, "#define TYPEDEF_UCHAR_BOOLEAN\n");
 #endif
   fprintf(outfile, "\n#ifdef JPEG_INTERNALS\n\n");
   if (is_shifting_signed(-0x7F7E80B1L))
@@ -368,6 +379,14 @@ int main (argc, argv)
   else
     fprintf(outfile, "#define RIGHT_SHIFT_IS_UNSIGNED\n");
   fprintf(outfile, "\n#endif /* JPEG_INTERNALS */\n");
+
+  fprintf(outfile, "\n#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)\n");
+  fprintf(outfile, "#undef JSIMD_MMX_NOT_SUPPORTED\n");
+  fprintf(outfile, "#undef JSIMD_3DNOW_NOT_SUPPORTED\n");
+  fprintf(outfile, "#undef JSIMD_SSE_NOT_SUPPORTED\n");
+  fprintf(outfile, "#undef JSIMD_SSE2_NOT_SUPPORTED\n");
+  fprintf(outfile, "#endif\n");
+
   fprintf(outfile, "\n#ifdef JPEG_CJPEG_DJPEG\n\n");
   fprintf(outfile, "#define BMP_SUPPORTED              /* BMP image file format */\n");
   fprintf(outfile, "#define GIF_SUPPORTED              /* GIF image file format */\n");
@@ -375,6 +394,9 @@ int main (argc, argv)
   fprintf(outfile, "#undef RLE_SUPPORTED               /* Utah RLE image file format */\n");
   fprintf(outfile, "#define TARGA_SUPPORTED            /* Targa image file format */\n\n");
   fprintf(outfile, "#undef TWO_FILE_COMMANDLINE        /* You may need this on non-Unix systems */\n");
+#ifdef _WIN32
+  fprintf(outfile, "#define USE_SETMODE                /* Needed to make one-file style work */\n");
+#endif
   fprintf(outfile, "#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */\n");
   fprintf(outfile, "#undef DONT_USE_B_MODE\n");
   fprintf(outfile, "/* #define PROGRESS_REPORT */      /* optional */\n");
old mode 100755 (executable)
new mode 100644 (file)
index 413ed41..fb25fa4
@@ -1,7 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright (C) 1992, 93, 94, 95, 96, 1997 Free Software Foundation, Inc.
-#
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+timestamp='2006-01-30'
+
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
-# Written by Per Bothner <bothner@cygnus.com>.
-# The master version of this file is at the FSF in /home/gd/gnu/lib.
+
+# Originally written by Per Bothner <per@bothner.com>.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
 #
 # This script attempts to guess a canonical system name similar to
 # config.sub.  If it succeeds, it prints the system name on stdout, and
 # exits with 0.  Otherwise, it exits with 1.
 #
 # The plan is that this can be called by configure scripts if you
-# don't specify an explicit system type (host/target name).
-#
-# Only a few systems have been added to this list; please add others
-# (but try to keep the structure clean).
-#
+# don't specify an explicit build system type.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )        # Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+       for c in cc gcc c89 c99 ; do
+         if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+            CC_FOR_BUILD="$c"; break ;
+         fi ;
+       done ;
+       if test x"$CC_FOR_BUILD" = x ; then
+         CC_FOR_BUILD=no_compiler_found ;
+       fi
+       ;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 8/24/94.)
+# (ghazi@noc.rutgers.edu 1994-08-24)
 if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
        PATH=$PATH:/.attbin ; export PATH
 fi
 
 UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
 UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
 UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
-trap 'rm -f dummy.c dummy.o dummy; exit 1' 1 2 15
-
 # Note: order is significant - the case branches are not exclusive.
 
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+       # NetBSD (nbsd) targets should (where applicable) match one or
+       # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+       # *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+       # switched to ELF, *-*-netbsd* would select the old
+       # object file format.  This provides both forward
+       # compatibility and a consistent mechanism for selecting the
+       # object file format.
+       #
+       # Note: NetBSD doesn't particularly care about the vendor
+       # portion of the name.  We always set it to "unknown".
+       sysctl="sysctl -n hw.machine_arch"
+       UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+           /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+       case "${UNAME_MACHINE_ARCH}" in
+           armeb) machine=armeb-unknown ;;
+           arm*) machine=arm-unknown ;;
+           sh3el) machine=shl-unknown ;;
+           sh3eb) machine=sh-unknown ;;
+           *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+       esac
+       # The Operating System including object format, if it has switched
+       # to ELF recently, or will in the future.
+       case "${UNAME_MACHINE_ARCH}" in
+           arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+               eval $set_cc_for_build
+               if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+                       | grep __ELF__ >/dev/null
+               then
+                   # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+                   # Return netbsd for either.  FIX?
+                   os=netbsd
+               else
+                   os=netbsdelf
+               fi
+               ;;
+           *)
+               os=netbsd
+               ;;
+       esac
+       # The OS release
+       # Debian GNU/NetBSD machines have a different userland, and
+       # thus, need a distinct triplet. However, they do not need
+       # kernel version information, so it can be replaced with a
+       # suitable tag, in the style of linux-gnu.
+       case "${UNAME_VERSION}" in
+           Debian*)
+               release='-gnu'
+               ;;
+           *)
+               release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+               ;;
+       esac
+       # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+       # contains redundant information, the shorter form:
+       # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+       echo "${machine}-${os}${release}"
+       exit ;;
+    *:OpenBSD:*:*)
+       UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+       echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+       exit ;;
+    *:ekkoBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+       exit ;;
+    *:SolidBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+       exit ;;
+    macppc:MirBSD:*:*)
+       echo powerppc-unknown-mirbsd${UNAME_RELEASE}
+       exit ;;
+    *:MirBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+       exit ;;
     alpha:OSF1:*:*)
-       if test $UNAME_RELEASE = "V4.0"; then
+       case $UNAME_RELEASE in
+       *4.0)
                UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
-       fi
+               ;;
+       *5.*)
+               UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+               ;;
+       esac
+       # According to Compaq, /usr/sbin/psrinfo has been available on
+       # OSF/1 and Tru64 systems produced since 1995.  I hope that
+       # covers most systems running today.  This code pipes the CPU
+       # types through head -n 1, so we only detect the type of CPU 0.
+       ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+       case "$ALPHA_CPU_TYPE" in
+           "EV4 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "EV4.5 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "LCA4 (21066/21068)")
+               UNAME_MACHINE="alpha" ;;
+           "EV5 (21164)")
+               UNAME_MACHINE="alphaev5" ;;
+           "EV5.6 (21164A)")
+               UNAME_MACHINE="alphaev56" ;;
+           "EV5.6 (21164PC)")
+               UNAME_MACHINE="alphapca56" ;;
+           "EV5.7 (21164PC)")
+               UNAME_MACHINE="alphapca57" ;;
+           "EV6 (21264)")
+               UNAME_MACHINE="alphaev6" ;;
+           "EV6.7 (21264A)")
+               UNAME_MACHINE="alphaev67" ;;
+           "EV6.8CB (21264C)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8AL (21264B)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8CX (21264D)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.9A (21264/EV69A)")
+               UNAME_MACHINE="alphaev69" ;;
+           "EV7 (21364)")
+               UNAME_MACHINE="alphaev7" ;;
+           "EV7.9 (21364A)")
+               UNAME_MACHINE="alphaev79" ;;
+       esac
+       # A Pn.n version is a patched version.
        # A Vn.n version is a released version.
        # A Tn.n version is a released field test version.
        # A Xn.n version is an unreleased experimental baselevel.
        # 1.2 uses "1.2" for uname -r.
-       cat <<EOF >dummy.s
-       .globl main
-       .ent main
-main:
-       .frame \$30,0,\$26,0
-       .prologue 0
-       .long 0x47e03d80 # implver $0
-       lda \$2,259
-       .long 0x47e20c21 # amask $2,$1
-       srl \$1,8,\$2
-       sll \$2,2,\$2
-       sll \$0,3,\$0
-       addl \$1,\$0,\$0
-       addl \$2,\$0,\$0
-       ret \$31,(\$26),1
-       .end main
-EOF
-       ${CC-cc} dummy.s -o dummy 2>/dev/null
-       if test "$?" = 0 ; then
-               ./dummy
-               case "$?" in
-                       7)
-                               UNAME_MACHINE="alpha"
-                               ;;
-                       15)
-                               UNAME_MACHINE="alphaev5"
-                               ;;
-                       14)
-                               UNAME_MACHINE="alphaev56"
-                               ;;
-                       10)
-                               UNAME_MACHINE="alphapca56"
-                               ;;
-                       16)
-                               UNAME_MACHINE="alphaev6"
-                               ;;
-               esac
-       fi
-       rm -f dummy.s dummy
-       echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr [[A-Z]] [[a-z]]`
-       exit 0 ;;
+       echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+       exit ;;
+    Alpha\ *:Windows_NT*:*)
+       # How do we know it's Interix rather than the generic POSIX subsystem?
+       # Should we change UNAME_MACHINE based on the output of uname instead
+       # of the specific Alpha model?
+       echo alpha-pc-interix
+       exit ;;
     21064:Windows_NT:50:3)
        echo alpha-dec-winnt3.5
-       exit ;;
+       exit ;;
     Amiga*:UNIX_System_V:4.0:*)
-       echo m68k-cbm-sysv4
-       exit 0;;
-    amiga:NetBSD:*:*)
-      echo m68k-cbm-netbsd${UNAME_RELEASE}
-      exit 0 ;;
-    amiga:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    arc64:OpenBSD:*:*)
-       echo mips64el-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    arc:OpenBSD:*:*)
-       echo mipsel-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    hkmips:OpenBSD:*:*)
-       echo mips-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    pmax:OpenBSD:*:*)
-       echo mipsel-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    sgi:OpenBSD:*:*)
-       echo mips-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    wgrisc:OpenBSD:*:*)
-       echo mipsel-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
+       echo m68k-unknown-sysv4
+       exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+       echo ${UNAME_MACHINE}-unknown-amigaos
+       exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+       echo ${UNAME_MACHINE}-unknown-morphos
+       exit ;;
+    *:OS/390:*:*)
+       echo i370-ibm-openedition
+       exit ;;
+    *:z/VM:*:*)
+       echo s390-ibm-zvmoe
+       exit ;;
+    *:OS400:*:*)
+        echo powerpc-ibm-os400
+       exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
        echo arm-acorn-riscix${UNAME_RELEASE}
-       exit 0;;
-    arm32:NetBSD:*:*)
-       echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-       exit ;;
-    SR2?01:HI-UX/MPP:*:*)
+       exit ;;
+    arm:riscos:*:*|arm:RISCOS:*:*)
+       echo arm-unknown-riscos
+       exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
        echo hppa1.1-hitachi-hiuxmpp
-       exit 0;;
-    Pyramid*:OSx*:*:*|MIS*:OSx*:*:*)
+       exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
        # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
        if test "`(/bin/universe) 2>/dev/null`" = att ; then
                echo pyramid-pyramid-sysv3
        else
                echo pyramid-pyramid-bsd
        fi
-       exit ;;
-    NILE:*:*:dcosx)
+       exit ;;
+    NILE*:*:*:dcosx)
        echo pyramid-pyramid-svr4
-       exit 0 ;;
+       exit ;;
+    DRS?6000:unix:4.0:6*)
+       echo sparc-icl-nx6
+       exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+       case `/usr/bin/uname -p` in
+           sparc) echo sparc-icl-nx7; exit ;;
+       esac ;;
+    sun4H:SunOS:5.*:*)
+       echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
        echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-       exit ;;
+       exit ;;
     i86pc:SunOS:5.*:*)
        echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-       exit ;;
+       exit ;;
     sun4*:SunOS:6*:*)
        # According to config.sub, this is the proper way to canonicalize
        # SunOS6.  Hard to guess exactly what SunOS6 will be like, but
        # it's likely to be more like Solaris than SunOS4.
        echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-       exit ;;
+       exit ;;
     sun4*:SunOS:*:*)
        case "`/usr/bin/arch -k`" in
            Series*|S4*)
@@ -170,12 +345,12 @@ EOF
        esac
        # Japanese Language versions have a version number like `4.1.3-JL'.
        echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
-       exit ;;
+       exit ;;
     sun3*:SunOS:*:*)
        echo m68k-sun-sunos${UNAME_RELEASE}
-       exit ;;
+       exit ;;
     sun*:*:4.2BSD:*)
-       UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+       UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
        test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
        case "`/bin/arch`" in
            sun3)
@@ -185,52 +360,63 @@ EOF
                echo sparc-sun-sunos${UNAME_RELEASE}
                ;;
        esac
-       exit ;;
+       exit ;;
     aushp:SunOS:*:*)
        echo sparc-auspex-sunos${UNAME_RELEASE}
-       exit 0 ;;
-    atari*:NetBSD:*:*)
-       echo m68k-atari-netbsd${UNAME_RELEASE}
-       exit 0 ;;
-    atari*:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    sun3*:NetBSD:*:*)
-       echo m68k-sun-netbsd${UNAME_RELEASE}
-       exit 0 ;;
-    sun3*:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    mac68k:NetBSD:*:*)
-       echo m68k-apple-netbsd${UNAME_RELEASE}
-       exit 0 ;;
-    mac68k:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    mvme68k:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    mvme88k:OpenBSD:*:*)
-       echo m88k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+       exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+       echo m68k-atari-mint${UNAME_RELEASE}
+        exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+       exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+        echo m68k-milan-mint${UNAME_RELEASE}
+        exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+        echo m68k-hades-mint${UNAME_RELEASE}
+        exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+        echo m68k-unknown-mint${UNAME_RELEASE}
+        exit ;;
+    m68k:machten:*:*)
+       echo m68k-apple-machten${UNAME_RELEASE}
+       exit ;;
     powerpc:machten:*:*)
        echo powerpc-apple-machten${UNAME_RELEASE}
-       exit ;;
+       exit ;;
     RISC*:Mach:*:*)
        echo mips-dec-mach_bsd4.3
-       exit ;;
+       exit ;;
     RISC*:ULTRIX:*:*)
        echo mips-dec-ultrix${UNAME_RELEASE}
-       exit ;;
+       exit ;;
     VAX*:ULTRIX*:*:*)
        echo vax-dec-ultrix${UNAME_RELEASE}
-       exit ;;
-    2020:CLIX:*:*)
+       exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
        echo clipper-intergraph-clix${UNAME_RELEASE}
-       exit ;;
+       exit ;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
-       sed 's/^        //' << EOF >dummy.c
-       int main (argc, argv) int argc; char **argv; {
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+       int main (int argc, char *argv[]) {
+#else
+       int main (argc, argv) int argc; char *argv[]; {
+#endif
        #if defined (host_mips) && defined (MIPSEB)
        #if defined (SYSTYPE_SYSV)
          printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
@@ -245,62 +431,83 @@ EOF
          exit (-1);
        }
 EOF
-       ${CC-cc} dummy.c -o dummy \
-         && ./dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
-         && rm dummy.c dummy && exit 0
-       rm -f dummy.c dummy
+       $CC_FOR_BUILD -o $dummy $dummy.c &&
+         dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+         SYSTEM_NAME=`$dummy $dummyarg` &&
+           { echo "$SYSTEM_NAME"; exit; }
        echo mips-mips-riscos${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
+    Motorola:PowerMAX_OS:*:*)
+       echo powerpc-motorola-powermax
+       exit ;;
+    Motorola:*:4.3:PL8-*)
+       echo powerpc-harris-powermax
+       exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+       echo powerpc-harris-powermax
+       exit ;;
     Night_Hawk:Power_UNIX:*:*)
        echo powerpc-harris-powerunix
-       exit ;;
+       exit ;;
     m88k:CX/UX:7*:*)
        echo m88k-harris-cxux7
-       exit ;;
+       exit ;;
     m88k:*:4*:R4*)
        echo m88k-motorola-sysv4
-       exit ;;
+       exit ;;
     m88k:*:3*:R3*)
        echo m88k-motorola-sysv3
-       exit ;;
+       exit ;;
     AViiON:dgux:*:*)
         # DG/UX returns AViiON for all architectures
         UNAME_PROCESSOR=`/usr/bin/uname -p`
-        if [ $UNAME_PROCESSOR = mc88100 -o $UNAME_PROCESSOR = mc88110 ] ; then
-       if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx \
-            -o ${TARGET_BINARY_INTERFACE}x = x ] ; then
+       if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+       then
+           if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+              [ ${TARGET_BINARY_INTERFACE}x = x ]
+           then
                echo m88k-dg-dgux${UNAME_RELEASE}
-       else
+           else
                echo m88k-dg-dguxbcs${UNAME_RELEASE}
+           fi
+       else
+           echo i586-dg-dgux${UNAME_RELEASE}
        fi
-        else echo i586-dg-dgux${UNAME_RELEASE}
-        fi
-       exit 0 ;;
+       exit ;;
     M88*:DolphinOS:*:*)        # DolphinOS (SVR3)
        echo m88k-dolphin-sysv3
-       exit ;;
+       exit ;;
     M88*:*:R3*:*)
        # Delta 88k system running SVR3
        echo m88k-motorola-sysv3
-       exit ;;
+       exit ;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
        echo m88k-tektronix-sysv3
-       exit ;;
+       exit ;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
        echo m68k-tektronix-bsd
-       exit ;;
+       exit ;;
     *:IRIX*:*:*)
        echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
-       exit ;;
+       exit ;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-       echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
-       exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
-    i?86:AIX:*:*)
+       echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+       exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
        echo i386-ibm-aix
-       exit 0 ;;
+       exit ;;
+    ia64:AIX:*:*)
+       if [ -x /usr/bin/oslevel ] ; then
+               IBM_REV=`/usr/bin/oslevel`
+       else
+               IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+       fi
+       echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+       exit ;;
     *:AIX:2:3)
        if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-               sed 's/^                //' << EOF >dummy.c
+               eval $set_cc_for_build
+               sed 's/^                //' << EOF >$dummy.c
                #include <sys/systemcfg.h>
 
                main()
@@ -311,17 +518,21 @@ EOF
                        exit(0);
                        }
 EOF
-               ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0
-               rm -f dummy.c dummy
-               echo rs6000-ibm-aix3.2.5
+               if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+               then
+                       echo "$SYSTEM_NAME"
+               else
+                       echo rs6000-ibm-aix3.2.5
+               fi
        elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
                echo rs6000-ibm-aix3.2.4
        else
                echo rs6000-ibm-aix3.2
        fi
-       exit 0 ;;
-    *:AIX:*:4)
-       if /usr/sbin/lsattr -EHl proc0 | grep POWER >/dev/null 2>&1; then
+       exit ;;
+    *:AIX:*:[45])
+       IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+       if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
                IBM_ARCH=rs6000
        else
                IBM_ARCH=powerpc
@@ -329,43 +540,120 @@ EOF
        if [ -x /usr/bin/oslevel ] ; then
                IBM_REV=`/usr/bin/oslevel`
        else
-               IBM_REV=4.${UNAME_RELEASE}
+               IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
        fi
        echo ${IBM_ARCH}-ibm-aix${IBM_REV}
-       exit ;;
+       exit ;;
     *:AIX:*:*)
        echo rs6000-ibm-aix
-       exit ;;
+       exit ;;
     ibmrt:4.4BSD:*|romp-ibm:BSD:*)
        echo romp-ibm-bsd4.4
-       exit ;;
-    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC NetBSD and
+       exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
        echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
-       exit 0 ;;                           # report: romp-ibm BSD 4.3
+       exit ;;                             # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
        echo rs6000-bull-bosx
-       exit ;;
+       exit ;;
     DPX/2?00:B.O.S.:*:*)
        echo m68k-bull-sysv3
-       exit ;;
+       exit ;;
     9000/[34]??:4.3bsd:1.*:*)
        echo m68k-hp-bsd
-       exit ;;
+       exit ;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
        echo m68k-hp-bsd4.4
-       exit 0 ;;
-    9000/[3478]??:HP-UX:*:*)
+       exit ;;
+    9000/[34678]??:HP-UX:*:*)
+       HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
        case "${UNAME_MACHINE}" in
            9000/31? )            HP_ARCH=m68000 ;;
            9000/[34]?? )         HP_ARCH=m68k ;;
-           9000/7?? | 9000/8?[1679] ) HP_ARCH=hppa1.1 ;;
-           9000/8?? )            HP_ARCH=hppa1.0 ;;
+           9000/[678][0-9][0-9])
+               if [ -x /usr/bin/getconf ]; then
+                   sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                    case "${sc_cpu_version}" in
+                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                      532)                      # CPU_PA_RISC2_0
+                        case "${sc_kernel_bits}" in
+                          32) HP_ARCH="hppa2.0n" ;;
+                          64) HP_ARCH="hppa2.0w" ;;
+                         '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                        esac ;;
+                    esac
+               fi
+               if [ "${HP_ARCH}" = "" ]; then
+                   eval $set_cc_for_build
+                   sed 's/^              //' << EOF >$dummy.c
+
+              #define _HPUX_SOURCE
+              #include <stdlib.h>
+              #include <unistd.h>
+
+              int main ()
+              {
+              #if defined(_SC_KERNEL_BITS)
+                  long bits = sysconf(_SC_KERNEL_BITS);
+              #endif
+                  long cpu  = sysconf (_SC_CPU_VERSION);
+
+                  switch (cpu)
+               {
+               case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+               case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+               case CPU_PA_RISC2_0:
+              #if defined(_SC_KERNEL_BITS)
+                   switch (bits)
+                       {
+                       case 64: puts ("hppa2.0w"); break;
+                       case 32: puts ("hppa2.0n"); break;
+                       default: puts ("hppa2.0"); break;
+                       } break;
+              #else  /* !defined(_SC_KERNEL_BITS) */
+                   puts ("hppa2.0"); break;
+              #endif
+               default: puts ("hppa1.0"); break;
+               }
+                  exit (0);
+              }
+EOF
+                   (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+                   test -z "$HP_ARCH" && HP_ARCH=hppa
+               fi ;;
        esac
-       HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+       if [ ${HP_ARCH} = "hppa2.0w" ]
+       then
+           eval $set_cc_for_build
+
+           # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+           # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+           # generating 64-bit code.  GNU and HP use different nomenclature:
+           #
+           # $ CC_FOR_BUILD=cc ./config.guess
+           # => hppa2.0w-hp-hpux11.23
+           # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+           # => hppa64-hp-hpux11.23
+
+           if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+               grep __LP64__ >/dev/null
+           then
+               HP_ARCH="hppa2.0w"
+           else
+               HP_ARCH="hppa64"
+           fi
+       fi
        echo ${HP_ARCH}-hp-hpux${HPUX_REV}
-       exit 0 ;;
+       exit ;;
+    ia64:HP-UX:*:*)
+       HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+       echo ia64-hp-hpux${HPUX_REV}
+       exit ;;
     3050*:HI-UX:*:*)
-       sed 's/^        //' << EOF >dummy.c
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
        #include <unistd.h>
        int
        main ()
@@ -390,324 +678,467 @@ EOF
          exit (0);
        }
 EOF
-       ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0
-       rm -f dummy.c dummy
+       $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+               { echo "$SYSTEM_NAME"; exit; }
        echo unknown-hitachi-hiuxwe2
-       exit ;;
+       exit ;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
        echo hppa1.1-hp-bsd
-       exit ;;
+       exit ;;
     9000/8??:4.3bsd:*:*)
        echo hppa1.0-hp-bsd
-       exit 0 ;;
+       exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+       echo hppa1.0-hp-mpeix
+       exit ;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
        echo hppa1.1-hp-osf
-       exit ;;
+       exit ;;
     hp8??:OSF1:*:*)
        echo hppa1.0-hp-osf
-       exit ;;
-    i?86:OSF1:*:*)
+       exit ;;
+    i*86:OSF1:*:*)
        if [ -x /usr/sbin/sysversion ] ; then
            echo ${UNAME_MACHINE}-unknown-osf1mk
        else
            echo ${UNAME_MACHINE}-unknown-osf1
        fi
-       exit ;;
+       exit ;;
     parisc*:Lites*:*:*)
        echo hppa1.1-hp-lites
-       exit ;;
+       exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
        echo c1-convex-bsd
-        exit ;;
+        exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
        if getsysinfo -f scalar_acc
        then echo c32-convex-bsd
        else echo c2-convex-bsd
        fi
-        exit ;;
+        exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
        echo c34-convex-bsd
-        exit ;;
+        exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
        echo c38-convex-bsd
-        exit ;;
+        exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
        echo c4-convex-bsd
-        exit 0 ;;
-    CRAY*X-MP:*:*:*)
-       echo xmp-cray-unicos
-        exit 0 ;;
+        exit ;;
     CRAY*Y-MP:*:*:*)
-       echo ymp-cray-unicos${UNAME_RELEASE}
-       exit ;;
+       echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
     CRAY*[A-Z]90:*:*:*)
        echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
        | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-             -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
-       exit 0 ;;
+             -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+             -e 's/\.[^.]*$/.X/'
+       exit ;;
     CRAY*TS:*:*:*)
-       echo t90-cray-unicos${UNAME_RELEASE}
-       exit 0 ;;
-    CRAY-2:*:*:*)
-       echo cray2-cray-unicos
-        exit 0 ;;
-    F300:UNIX_System_V:*:*)
-        FUJITSU_SYS=`uname -p | tr [A-Z] [a-z] | sed -e 's/\///'`
+       echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*T3E:*:*:*)
+       echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*SV1:*:*:*)
+       echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    *:UNICOS/mp:*:*)
+       echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+       FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
         FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit 0 ;;
-    F301:UNIX_System_V:*:*)
-       echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'`
-       exit 0 ;;
-    hp3[0-9][05]:NetBSD:*:*)
-       echo m68k-hp-netbsd${UNAME_RELEASE}
-       exit 0 ;;
-    hp300:OpenBSD:*:*)
-       echo m68k-unknown-openbsd${UNAME_RELEASE}
-       exit 0 ;;
-    i?86:BSD/386:*:* | *:BSD/OS:*:*)
+        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+        exit ;;
+    5000:UNIX_System_V:4.*:*)
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+       exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
        echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
+    sparc*:BSD/OS:*:*)
+       echo sparc-unknown-bsdi${UNAME_RELEASE}
+       exit ;;
+    *:BSD/OS:*:*)
+       echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+       exit ;;
     *:FreeBSD:*:*)
-       echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-       exit 0 ;;
-    *:NetBSD:*:*)
-       echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-       exit 0 ;;
-    *:OpenBSD:*:*)
-       echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-       exit 0 ;;
+       case ${UNAME_MACHINE} in
+           pc98)
+               echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+           *)
+               echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+       esac
+       exit ;;
     i*:CYGWIN*:*)
-       echo i386-pc-cygwin32
-       exit ;;
+       echo ${UNAME_MACHINE}-pc-cygwin
+       exit ;;
     i*:MINGW*:*)
-       echo i386-pc-mingw32
-       exit 0 ;;
+       echo ${UNAME_MACHINE}-pc-mingw32
+       exit ;;
+    i*:windows32*:*)
+       # uname -m includes "-pc" on this system.
+       echo ${UNAME_MACHINE}-mingw32
+       exit ;;
+    i*:PW*:*)
+       echo ${UNAME_MACHINE}-pc-pw32
+       exit ;;
+    x86:Interix*:[345]*)
+       echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//'
+       exit ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+       echo i${UNAME_MACHINE}-pc-mks
+       exit ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+       # How do we know it's Interix rather than the generic POSIX subsystem?
+       # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+       # UNAME_MACHINE based on the output of uname instead of i386?
+       echo i586-pc-interix
+       exit ;;
+    i*:UWIN*:*)
+       echo ${UNAME_MACHINE}-pc-uwin
+       exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+       echo x86_64-unknown-cygwin
+       exit ;;
     p*:CYGWIN*:*)
-       echo powerpcle-unknown-cygwin32
-       exit ;;
+       echo powerpcle-unknown-cygwin
+       exit ;;
     prep*:SunOS:5.*:*)
        echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-       exit ;;
+       exit ;;
     *:GNU:*:*)
+       # the GNU system
        echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
-       exit 0 ;;
-    *:Linux:*:*)
+       exit ;;
+    *:GNU/*:*:*)
+       # other systems with GNU libc and userland
+       echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+       exit ;;
+    i*86:Minix:*:*)
+       echo ${UNAME_MACHINE}-pc-minix
+       exit ;;
+    arm*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    cris:Linux:*:*)
+       echo cris-axis-linux-gnu
+       exit ;;
+    crisv32:Linux:*:*)
+       echo crisv32-axis-linux-gnu
+       exit ;;
+    frv:Linux:*:*)
+       echo frv-unknown-linux-gnu
+       exit ;;
+    ia64:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    m32r*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    m68*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    mips:Linux:*:*)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #undef CPU
+       #undef mips
+       #undef mipsel
+       #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+       CPU=mipsel
+       #else
+       #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+       CPU=mips
+       #else
+       CPU=
+       #endif
+       #endif
+EOF
+       eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+           /^CPU/{
+               s: ::g
+               p
+           }'`"
+       test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+       ;;
+    mips64:Linux:*:*)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #undef CPU
+       #undef mips64
+       #undef mips64el
+       #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+       CPU=mips64el
+       #else
+       #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+       CPU=mips64
+       #else
+       CPU=
+       #endif
+       #endif
+EOF
+       eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+           /^CPU/{
+               s: ::g
+               p
+           }'`"
+       test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+       ;;
+    or32:Linux:*:*)
+       echo or32-unknown-linux-gnu
+       exit ;;
+    ppc:Linux:*:*)
+       echo powerpc-unknown-linux-gnu
+       exit ;;
+    ppc64:Linux:*:*)
+       echo powerpc64-unknown-linux-gnu
+       exit ;;
+    alpha:Linux:*:*)
+       case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+         EV5)   UNAME_MACHINE=alphaev5 ;;
+         EV56)  UNAME_MACHINE=alphaev56 ;;
+         PCA56) UNAME_MACHINE=alphapca56 ;;
+         PCA57) UNAME_MACHINE=alphapca56 ;;
+         EV6)   UNAME_MACHINE=alphaev6 ;;
+         EV67)  UNAME_MACHINE=alphaev67 ;;
+         EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+       objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+       if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+       echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+       exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+       # Look for CPU level
+       case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+         PA7*) echo hppa1.1-unknown-linux-gnu ;;
+         PA8*) echo hppa2.0-unknown-linux-gnu ;;
+         *)    echo hppa-unknown-linux-gnu ;;
+       esac
+       exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+       echo hppa64-unknown-linux-gnu
+       exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+       echo ${UNAME_MACHINE}-ibm-linux
+       exit ;;
+    sh64*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    sh*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    vax:Linux:*:*)
+       echo ${UNAME_MACHINE}-dec-linux-gnu
+       exit ;;
+    x86_64:Linux:*:*)
+       echo x86_64-unknown-linux-gnu
+       exit ;;
+    i*86:Linux:*:*)
        # The BFD linker knows what the default object file format is, so
-       # first see if it will tell us.
-       ld_help_string=`ld --help 2>&1`
-       ld_supported_emulations=`echo $ld_help_string \
-                        | sed -ne '/supported emulations:/!d
+       # first see if it will tell us. cd to the root directory to prevent
+       # problems with other programs or directories called `ld' in the path.
+       # Set LC_ALL=C to ensure ld outputs messages in English.
+       ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+                        | sed -ne '/supported targets:/!d
                                    s/[         ][      ]*/ /g
-                                   s/.*supported emulations: *//
+                                   s/.*supported targets: *//
                                    s/ .*//
                                    p'`
-        case "$ld_supported_emulations" in
-         i?86linux)  echo "${UNAME_MACHINE}-pc-linux-gnuaout"      ; exit 0 ;;
-         i?86coff)   echo "${UNAME_MACHINE}-pc-linux-gnucoff"      ; exit 0 ;;
-         sparclinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;;
-         m68klinux)  echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;;
-         elf32ppc)   echo "powerpc-unknown-linux-gnu"              ; exit 0 ;;
+        case "$ld_supported_targets" in
+         elf32-i386)
+               TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+               ;;
+         a.out-i386-linux)
+               echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+               exit ;;
+         coff-i386)
+               echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+               exit ;;
+         "")
+               # Either a pre-BFD a.out linker (linux-gnuoldld) or
+               # one that does not give us useful --help.
+               echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+               exit ;;
        esac
-
-       if test "${UNAME_MACHINE}" = "alpha" ; then
-               sed 's/^        //'  <<EOF >dummy.s
-               .globl main
-               .ent main
-       main:
-               .frame \$30,0,\$26,0
-               .prologue 0
-               .long 0x47e03d80 # implver $0
-               lda \$2,259
-               .long 0x47e20c21 # amask $2,$1
-               srl \$1,8,\$2
-               sll \$2,2,\$2
-               sll \$0,3,\$0
-               addl \$1,\$0,\$0
-               addl \$2,\$0,\$0
-               ret \$31,(\$26),1
-               .end main
-EOF
-               LIBC=""
-               ${CC-cc} dummy.s -o dummy 2>/dev/null
-               if test "$?" = 0 ; then
-                       ./dummy
-                       case "$?" in
-                       7)
-                               UNAME_MACHINE="alpha"
-                               ;;
-                       15)
-                               UNAME_MACHINE="alphaev5"
-                               ;;
-                       14)
-                               UNAME_MACHINE="alphaev56"
-                               ;;
-                       10)
-                               UNAME_MACHINE="alphapca56"
-                               ;;
-                       16)
-                               UNAME_MACHINE="alphaev6"
-                               ;;
-                       esac    
-
-                       objdump --private-headers dummy | \
-                         grep ld.so.1 > /dev/null
-                       if test "$?" = 0 ; then
-                               LIBC="libc1"
-                       fi
-               fi      
-               rm -f dummy.s dummy
-               echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
-       elif test "${UNAME_MACHINE}" = "mips" ; then
-         cat >dummy.c <<EOF
-main(argc, argv)
-     int argc;
-     char *argv[];
-{
-#ifdef __MIPSEB__
-  printf ("%s-unknown-linux-gnu\n", argv[1]);
-#endif
-#ifdef __MIPSEL__
-  printf ("%sel-unknown-linux-gnu\n", argv[1]);
-#endif
-  return 0;
-}
-EOF
-         ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0
-         rm -f dummy.c dummy
-       else
-         # Either a pre-BFD a.out linker (linux-gnuoldld)
-         # or one that does not give us useful --help.
-         # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout.
-         # If ld does not provide *any* "supported emulations:"
-         # that means it is gnuoldld.
-         echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:"
-         test $? != 0 && echo "${UNAME_MACHINE}-pc-linux-gnuoldld" && exit 0
-
-         case "${UNAME_MACHINE}" in
-         i?86)
-           VENDOR=pc;
-           ;;
-         *)
-           VENDOR=unknown;
-           ;;
-         esac
-         # Determine whether the default compiler is a.out or elf
-         cat >dummy.c <<EOF
-#include <features.h>
-main(argc, argv)
-     int argc;
-     char *argv[];
-{
-#ifdef __ELF__
-# ifdef __GLIBC__
-#  if __GLIBC__ >= 2
-    printf ("%s-${VENDOR}-linux-gnu\n", argv[1]);
-#  else
-    printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
-#  endif
-# else
-   printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
-# endif
-#else
-  printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]);
-#endif
-  return 0;
-}
+       # Determine whether the default compiler is a.out or elf
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #include <features.h>
+       #ifdef __ELF__
+       # ifdef __GLIBC__
+       #  if __GLIBC__ >= 2
+       LIBC=gnu
+       #  else
+       LIBC=gnulibc1
+       #  endif
+       # else
+       LIBC=gnulibc1
+       # endif
+       #else
+       #if defined(__INTEL_COMPILER) || defined(__PGI)
+       LIBC=gnu
+       #else
+       LIBC=gnuaout
+       #endif
+       #endif
+       #ifdef __dietlibc__
+       LIBC=dietlibc
+       #endif
 EOF
-         ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0
-         rm -f dummy.c dummy
-       fi ;;
-# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.  earlier versions
-# are messed up and put the nodename in both sysname and nodename.
-    i?86:DYNIX/ptx:4*:*)
+       eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+           /^LIBC/{
+               s: ::g
+               p
+           }'`"
+       test x"${LIBC}" != x && {
+               echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+               exit
+       }
+       test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
+       ;;
+    i*86:DYNIX/ptx:4*:*)
+       # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+       # earlier versions are messed up and put the nodename in both
+       # sysname and nodename.
        echo i386-sequent-sysv4
-       exit ;;
-    i?86:UNIX_SV:4.2MP:2.*)
+       exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
         # Unixware is an offshoot of SVR4, but it has its own version
         # number series starting with 2...
         # I am not positive that other SVR4 systems won't match this,
        # I just have to hope.  -- rms.
         # Use sysv4.2uw... so that sysv4* matches it.
        echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
-       exit 0 ;;
-    i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*)
+       exit ;;
+    i*86:OS/2:*:*)
+       # If we were able to find `uname', then EMX Unix compatibility
+       # is probably installed.
+       echo ${UNAME_MACHINE}-pc-os2-emx
+       exit ;;
+    i*86:XTS-300:*:STOP)
+       echo ${UNAME_MACHINE}-unknown-stop
+       exit ;;
+    i*86:atheos:*:*)
+       echo ${UNAME_MACHINE}-unknown-atheos
+       exit ;;
+    i*86:syllable:*:*)
+       echo ${UNAME_MACHINE}-pc-syllable
+       exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+       echo i386-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    i*86:*DOS:*:*)
+       echo ${UNAME_MACHINE}-pc-msdosdjgpp
+       exit ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+       UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
        if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-               echo ${UNAME_MACHINE}-univel-sysv${UNAME_RELEASE}
+               echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
        else
-               echo ${UNAME_MACHINE}-pc-sysv${UNAME_RELEASE}
+               echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
        fi
-       exit 0 ;;
-    i?86:*:3.2:*)
+       exit ;;
+    i*86:*:5:[678]*)
+       # UnixWare 7.x, OpenUNIX and OpenServer 6.
+       case `/bin/uname -X | grep "^Machine"` in
+           *486*)           UNAME_MACHINE=i486 ;;
+           *Pentium)        UNAME_MACHINE=i586 ;;
+           *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+       esac
+       echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+       exit ;;
+    i*86:*:3.2:*)
        if test -f /usr/options/cb.name; then
                UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
                echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
        elif /bin/uname -X 2>/dev/null >/dev/null ; then
-               UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')`
-               (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486
-               (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \
+               UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+               (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+               (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
                        && UNAME_MACHINE=i586
+               (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+                       && UNAME_MACHINE=i686
+               (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+                       && UNAME_MACHINE=i686
                echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
        else
                echo ${UNAME_MACHINE}-pc-sysv32
        fi
-       exit ;;
+       exit ;;
     pc:*:*:*)
+       # Left here for compatibility:
         # uname -m prints for DJGPP always 'pc', but it prints nothing about
         # the processor, so we play safe by assuming i386.
        echo i386-pc-msdosdjgpp
-        exit ;;
+        exit ;;
     Intel:Mach:3*:*)
        echo i386-pc-mach3
-       exit ;;
+       exit ;;
     paragon:*:*:*)
        echo i860-intel-osf1
-       exit ;;
+       exit ;;
     i860:*:4.*:*) # i860-SVR4
        if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
          echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
        else # Add other i860-SVR4 vendors below as they are discovered.
          echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
        fi
-       exit ;;
+       exit ;;
     mini*:CTIX:SYS*5:*)
        # "miniframe"
        echo m68010-convergent-sysv
-       exit 0 ;;
-    M68*:*:R3V[567]*:*)
-       test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
-    3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0)
+       exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+       echo m68k-convergent-sysv
+       exit ;;
+    M680?0:D-NIX:5.3:*)
+       echo m68k-diab-dnix
+       exit ;;
+    M68*:*:R3V[5678]*:*)
+       test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
        OS_REL=''
        test -r /etc/.relid \
        && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-         && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+         && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
        /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-         && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+         && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
         /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && echo i486-ncr-sysv4 && exit 0 ;;
-    m68*:LynxOS:2.*:*)
+          && { echo i486-ncr-sysv4; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
        echo m68k-unknown-lynxos${UNAME_RELEASE}
-       exit ;;
+       exit ;;
     mc68030:UNIX_System_V:4.*:*)
        echo m68k-atari-sysv4
-       exit 0 ;;
-    i?86:LynxOS:2.*:*)
-       echo i386-unknown-lynxos${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
     TSUNAMI:LynxOS:2.*:*)
        echo sparc-unknown-lynxos${UNAME_RELEASE}
-       exit ;;
-    rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*)
+       exit ;;
+    rs6000:LynxOS:2.*:*)
        echo rs6000-unknown-lynxos${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+       echo powerpc-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
     SM[BE]S:UNIX_SV:*:*)
        echo mips-dde-sysv${UNAME_RELEASE}
-       exit 0 ;;
+       exit ;;
+    RM*:ReliantUNIX-*:*:*)
+       echo mips-sni-sysv4
+       exit ;;
     RM*:SINIX-*:*:*)
        echo mips-sni-sysv4
-       exit ;;
+       exit ;;
     *:SINIX-*:*:*)
        if uname -p 2>/dev/null >/dev/null ; then
                UNAME_MACHINE=`(uname -p) 2>/dev/null`
@@ -715,39 +1146,156 @@ EOF
        else
                echo ns32k-sni-sysv
        fi
-       exit ;;
-    PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                           # says <Richard.M.Bartel@ccMail.Census.GOV>
+       exit ;;
+    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                      # says <Richard.M.Bartel@ccMail.Census.GOV>
         echo i586-unisys-sysv4
-        exit ;;
+        exit ;;
     *:UNIX_System_V:4*:FTX*)
        # From Gerald Hewes <hewes@openmarket.com>.
        # How about differentiating between stratus architectures? -djm
        echo hppa1.1-stratus-sysv4
-       exit ;;
+       exit ;;
     *:*:*:FTX*)
        # From seanf@swdc.stratus.com.
        echo i860-stratus-sysv4
-       exit 0 ;;
+       exit ;;
+    i*86:VOS:*:*)
+       # From Paul.Green@stratus.com.
+       echo ${UNAME_MACHINE}-stratus-vos
+       exit ;;
+    *:VOS:*:*)
+       # From Paul.Green@stratus.com.
+       echo hppa1.1-stratus-vos
+       exit ;;
     mc68*:A/UX:*:*)
        echo m68k-apple-aux${UNAME_RELEASE}
-       exit ;;
-    news*:NEWS-OS:*:6*)
+       exit ;;
+    news*:NEWS-OS:6*:*)
        echo mips-sony-newsos6
-       exit ;;
-    R3000:*System_V*:*:* | R4000:UNIX_SYSV:*:*)
+       exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
        if [ -d /usr/nec ]; then
                echo mips-nec-sysv${UNAME_RELEASE}
        else
                echo mips-unknown-sysv${UNAME_RELEASE}
        fi
-        exit 0 ;;
+        exit ;;
+    BeBox:BeOS:*:*)    # BeOS running on hardware made by Be, PPC only.
+       echo powerpc-be-beos
+       exit ;;
+    BeMac:BeOS:*:*)    # BeOS running on Mac or Mac clone, PPC only.
+       echo powerpc-apple-beos
+       exit ;;
+    BePC:BeOS:*:*)     # BeOS running on Intel PC compatible.
+       echo i586-pc-beos
+       exit ;;
+    SX-4:SUPER-UX:*:*)
+       echo sx4-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-5:SUPER-UX:*:*)
+       echo sx5-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-6:SUPER-UX:*:*)
+       echo sx6-nec-superux${UNAME_RELEASE}
+       exit ;;
+    Power*:Rhapsody:*:*)
+       echo powerpc-apple-rhapsody${UNAME_RELEASE}
+       exit ;;
+    *:Rhapsody:*:*)
+       echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+       exit ;;
+    *:Darwin:*:*)
+       UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+       case $UNAME_PROCESSOR in
+           unknown) UNAME_PROCESSOR=powerpc ;;
+       esac
+       echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+       exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+       UNAME_PROCESSOR=`uname -p`
+       if test "$UNAME_PROCESSOR" = "x86"; then
+               UNAME_PROCESSOR=i386
+               UNAME_MACHINE=pc
+       fi
+       echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+       exit ;;
+    *:QNX:*:4*)
+       echo i386-pc-qnx
+       exit ;;
+    NSE-?:NONSTOP_KERNEL:*:*)
+       echo nse-tandem-nsk${UNAME_RELEASE}
+       exit ;;
+    NSR-?:NONSTOP_KERNEL:*:*)
+       echo nsr-tandem-nsk${UNAME_RELEASE}
+       exit ;;
+    *:NonStop-UX:*:*)
+       echo mips-compaq-nonstopux
+       exit ;;
+    BS2000:POSIX*:*:*)
+       echo bs2000-siemens-sysv
+       exit ;;
+    DS/*:UNIX_System_V:*:*)
+       echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+       exit ;;
+    *:Plan9:*:*)
+       # "uname -m" is not consistent, so use $cputype instead. 386
+       # is converted to i386 for consistency with other x86
+       # operating systems.
+       if test "$cputype" = "386"; then
+           UNAME_MACHINE=i386
+       else
+           UNAME_MACHINE="$cputype"
+       fi
+       echo ${UNAME_MACHINE}-unknown-plan9
+       exit ;;
+    *:TOPS-10:*:*)
+       echo pdp10-unknown-tops10
+       exit ;;
+    *:TENEX:*:*)
+       echo pdp10-unknown-tenex
+       exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+       echo pdp10-dec-tops20
+       exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+       echo pdp10-xkl-tops20
+       exit ;;
+    *:TOPS-20:*:*)
+       echo pdp10-unknown-tops20
+       exit ;;
+    *:ITS:*:*)
+       echo pdp10-unknown-its
+       exit ;;
+    SEI:*:*:SEIUX)
+        echo mips-sei-seiux${UNAME_RELEASE}
+       exit ;;
+    *:DragonFly:*:*)
+       echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+       exit ;;
+    *:*VMS:*:*)
+       UNAME_MACHINE=`(uname -p) 2>/dev/null`
+       case "${UNAME_MACHINE}" in
+           A*) echo alpha-dec-vms ; exit ;;
+           I*) echo ia64-dec-vms ; exit ;;
+           V*) echo vax-dec-vms ; exit ;;
+       esac ;;
+    *:XENIX:*:SysV)
+       echo i386-pc-xenix
+       exit ;;
+    i*86:skyos:*:*)
+       echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+       exit ;;
+    i*86:rdos:*:*)
+       echo ${UNAME_MACHINE}-pc-rdos
+       exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
 #echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
 
-cat >dummy.c <<EOF
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
 #ifdef _SEQUENT_
 # include <sys/types.h>
 # include <sys/utsname.h>
@@ -772,7 +1320,7 @@ main ()
 #endif
 
 #if defined (__arm) && defined (__acorn) && defined (__unix)
-  printf ("arm-acorn-riscix"); exit (0);
+  printf ("arm-acorn-riscix\n"); exit (0);
 #endif
 
 #if defined (hp300) && !defined (hpux)
@@ -785,7 +1333,10 @@ main ()
 #endif
   int version;
   version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
-  printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
   exit (0);
 #endif
 
@@ -830,11 +1381,24 @@ main ()
 #endif
 
 #if defined (vax)
-#if !defined (ultrix)
-  printf ("vax-dec-bsd\n"); exit (0);
-#else
-  printf ("vax-dec-ultrix\n"); exit (0);
-#endif
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
 #endif
 
 #if defined (alliant) && defined (i860)
@@ -845,12 +1409,12 @@ main ()
 }
 EOF
 
-${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy && rm dummy.c dummy && exit 0
-rm -f dummy.c dummy
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+       { echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
 
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
 
 # Convex versions that predate uname can use getsysinfo(1)
 
@@ -859,25 +1423,69 @@ then
     case `getsysinfo -f cpu_type` in
     c1*)
        echo c1-convex-bsd
-       exit ;;
+       exit ;;
     c2*)
        if getsysinfo -f scalar_acc
        then echo c32-convex-bsd
        else echo c2-convex-bsd
        fi
-       exit ;;
+       exit ;;
     c34*)
        echo c34-convex-bsd
-       exit ;;
+       exit ;;
     c38*)
        echo c38-convex-bsd
-       exit ;;
+       exit ;;
     c4*)
        echo c4-convex-bsd
-       exit ;;
+       exit ;;
     esac
 fi
 
-#echo '(Unable to guess system type)' 1>&2
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+  http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.guess
+and
+  http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.sub
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
 
 exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
old mode 100755 (executable)
new mode 100644 (file)
index 213a6d4..a4e8a94
@@ -1,6 +1,10 @@
 #! /bin/sh
-# Configuration validation subroutine script, version 1.1.
-#   Copyright (C) 1991, 92, 93, 94, 95, 96, 1997 Free Software Foundation, Inc.
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+timestamp='2006-01-02'
+
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
 # can handle that machine.  It does not imply ALL GNU software can.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
 # If it is invalid, we print an error message on stderr and exit with code 1.
 #      CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-if [ x$1 = x ]
-then
-       echo Configuration name missing. 1>&2
-       echo "Usage: $0 CPU-MFR-OPSYS" 1>&2
-       echo "or     $0 ALIAS" 1>&2
-       echo where ALIAS is a recognized configuration type. 1>&2
-       exit 1
-fi
+me=`echo "$0" | sed -e 's,.*/,,'`
 
-# First pass through any local machine types.
-case $1 in
-       *local*)
-               echo $1
-               exit 0
-               ;;
-       *)
-       ;;
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )        # Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
 esac
 
 # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  linux-gnu*)
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
+  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
@@ -94,15 +147,37 @@ case $os in
        -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
        -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
        -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-       -apple)
+       -apple | -axis | -knuth | -cray)
                os=
                basic_machine=$1
                ;;
+       -sim | -cisco | -oki | -wec | -winbond)
+               os=
+               basic_machine=$1
+               ;;
+       -scout)
+               ;;
+       -wrs)
+               os=-vxworks
+               basic_machine=$1
+               ;;
+       -chorusos*)
+               os=-chorusos
+               basic_machine=$1
+               ;;
+       -chorusrdb)
+               os=-chorusrdb
+               basic_machine=$1
+               ;;
        -hiux*)
                os=-hiuxwe2
                ;;
+       -sco6)
+               os=-sco5v6
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
        -sco5)
-               os=sco3.2v5
+               os=-sco3.2v5
                basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
                ;;
        -sco4)
@@ -117,10 +192,17 @@ case $os in
                # Don't forget version if it is 3.2v4 or newer.
                basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
                ;;
+       -sco5v6*)
+               # Don't forget version if it is 3.2v4 or newer.
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
        -sco*)
                os=-sco3.2v2
                basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
                ;;
+       -udk*)
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
        -isc)
                os=-isc2.2
                basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@@ -143,26 +225,84 @@ case $os in
        -psos*)
                os=-psos
                ;;
+       -mint | -mint[0-9]*)
+               basic_machine=m68k-atari
+               os=-mint
+               ;;
 esac
 
 # Decode aliases for certain CPU-COMPANY combinations.
 case $basic_machine in
        # Recognize the basic CPU types without company name.
        # Some are omitted here because they have special meanings below.
-       tahoe | i860 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \
-               | arme[lb] | pyramid | mn10200 | mn10300 \
-               | tron | a29k | 580 | i960 | h8300 | hppa | hppa1.0 | hppa1.1 \
-               | alpha | alphaev5 | alphaev56 | we32k | ns16k | clipper \
-               | i370 | sh | powerpc | powerpcle | 1750a | dsp16xx | pdp11 \
-               | mips64 | mipsel | mips64el | mips64orion | mips64orionel \
-               | mipstx39 | mipstx39el \
-               | sparc | sparclet | sparclite | sparc64 | v850)
+       1750a | 580 \
+       | a29k \
+       | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+       | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+       | am33_2.0 \
+       | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+       | bfin \
+       | c4x | clipper \
+       | d10v | d30v | dlx | dsp16xx \
+       | fr30 | frv \
+       | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+       | i370 | i860 | i960 | ia64 \
+       | ip2k | iq2000 \
+       | m32r | m32rle | m68000 | m68k | m88k | maxq | mb | microblaze | mcore \
+       | mips | mipsbe | mipseb | mipsel | mipsle \
+       | mips16 \
+       | mips64 | mips64el \
+       | mips64vr | mips64vrel \
+       | mips64orion | mips64orionel \
+       | mips64vr4100 | mips64vr4100el \
+       | mips64vr4300 | mips64vr4300el \
+       | mips64vr5000 | mips64vr5000el \
+       | mips64vr5900 | mips64vr5900el \
+       | mipsisa32 | mipsisa32el \
+       | mipsisa32r2 | mipsisa32r2el \
+       | mipsisa64 | mipsisa64el \
+       | mipsisa64r2 | mipsisa64r2el \
+       | mipsisa64sb1 | mipsisa64sb1el \
+       | mipsisa64sr71k | mipsisa64sr71kel \
+       | mipstx39 | mipstx39el \
+       | mn10200 | mn10300 \
+       | mt \
+       | msp430 \
+       | ns16k | ns32k \
+       | or32 \
+       | pdp10 | pdp11 | pj | pjl \
+       | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+       | pyramid \
+       | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+       | sh64 | sh64le \
+       | sparc | sparc64 | sparc64b | sparc86x | sparclet | sparclite \
+       | sparcv8 | sparcv9 | sparcv9b \
+       | strongarm \
+       | tahoe | thumb | tic4x | tic80 | tron \
+       | v850 | v850e \
+       | we32k \
+       | x86 | xscale | xscalee[bl] | xstormy16 | xtensa \
+       | z8k)
+               basic_machine=$basic_machine-unknown
+               ;;
+       m32c)
                basic_machine=$basic_machine-unknown
                ;;
+       m6811 | m68hc11 | m6812 | m68hc12)
+               # Motorola 68HC11/12.
+               basic_machine=$basic_machine-unknown
+               os=-none
+               ;;
+       m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+               ;;
+       ms1)
+               basic_machine=mt-unknown
+               ;;
+
        # We use `pc' rather than `unknown'
        # because (1) that's what they normally are, and
        # (2) the word "unknown" tends to confuse beginning users.
-       i[3456]86)
+       i*86 | x86_64)
          basic_machine=$basic_machine-pc
          ;;
        # Object if more than one company name word.
@@ -171,27 +311,91 @@ case $basic_machine in
                exit 1
                ;;
        # Recognize the basic CPU types with company name.
-       vax-* | tahoe-* | i[3456]86-* | i860-* | m32r-* | m68k-* | m68000-* \
-             | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \
-             | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
-             | power-* | none-* | 580-* | cray2-* | h8300-* | i960-* \
-             | xmp-* | ymp-* | hppa-* | hppa1.0-* | hppa1.1-* \
-             | alpha-* | alphaev5-* | alphaev56-* | we32k-* | cydra-* \
-             | ns16k-* | pn-* | np1-* | xps100-* | clipper-* | orion-* \
-             | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \
-             | sparc64-* | mips64-* | mipsel-* \
-             | mips64el-* | mips64orion-* | mips64orionel-*  \
-             | mipstx39-* | mipstx39el-* \
-             | f301-*)
+       580-* \
+       | a29k-* \
+       | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+       | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+       | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+       | arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+       | avr-* \
+       | bfin-* | bs2000-* \
+       | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+       | clipper-* | craynv-* | cydra-* \
+       | d10v-* | d30v-* | dlx-* \
+       | elxsi-* \
+       | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+       | h8300-* | h8500-* \
+       | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+       | i*86-* | i860-* | i960-* | ia64-* \
+       | ip2k-* | iq2000-* \
+       | m32r-* | m32rle-* \
+       | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+       | m88110-* | m88k-* | maxq-* | mcore-* \
+       | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+       | mips16-* \
+       | mips64-* | mips64el-* \
+       | mips64vr-* | mips64vrel-* \
+       | mips64orion-* | mips64orionel-* \
+       | mips64vr4100-* | mips64vr4100el-* \
+       | mips64vr4300-* | mips64vr4300el-* \
+       | mips64vr5000-* | mips64vr5000el-* \
+       | mips64vr5900-* | mips64vr5900el-* \
+       | mipsisa32-* | mipsisa32el-* \
+       | mipsisa32r2-* | mipsisa32r2el-* \
+       | mipsisa64-* | mipsisa64el-* \
+       | mipsisa64r2-* | mipsisa64r2el-* \
+       | mipsisa64sb1-* | mipsisa64sb1el-* \
+       | mipsisa64sr71k-* | mipsisa64sr71kel-* \
+       | mipstx39-* | mipstx39el-* \
+       | mmix-* \
+       | mt-* \
+       | msp430-* \
+       | none-* | np1-* | ns16k-* | ns32k-* \
+       | orion-* \
+       | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+       | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+       | pyramid-* \
+       | romp-* | rs6000-* \
+       | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+       | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+       | sparc-* | sparc64-* | sparc64b-* | sparc86x-* | sparclet-* \
+       | sparclite-* \
+       | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+       | tahoe-* | thumb-* \
+       | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+       | tron-* \
+       | v850-* | v850e-* | vax-* \
+       | we32k-* \
+       | x86-* | x86_64-* | xps100-* | xscale-* | xscalee[bl]-* \
+       | xstormy16-* | xtensa-* \
+       | ymp-* \
+       | z8k-*)
+               ;;
+       m32c-*)
                ;;
        # Recognize the various machine names and aliases which stand
        # for a CPU type and a company and sometimes even an OS.
+       386bsd)
+               basic_machine=i386-unknown
+               os=-bsd
+               ;;
        3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
                basic_machine=m68000-att
                ;;
        3b*)
                basic_machine=we32k-att
                ;;
+       a29khif)
+               basic_machine=a29k-amd
+               os=-udi
+               ;;
+       abacus)
+               basic_machine=abacus-unknown
+               ;;
+       adobe68k)
+               basic_machine=m68010-adobe
+               os=-scout
+               ;;
        alliant | fx80)
                basic_machine=fx80-alliant
                ;;
@@ -202,25 +406,35 @@ case $basic_machine in
                basic_machine=a29k-none
                os=-bsd
                ;;
+       amd64)
+               basic_machine=x86_64-pc
+               ;;
+       amd64-*)
+               basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
        amdahl)
                basic_machine=580-amdahl
                os=-sysv
                ;;
        amiga | amiga-*)
-               basic_machine=m68k-cbm
+               basic_machine=m68k-unknown
                ;;
        amigaos | amigados)
-               basic_machine=m68k-cbm
+               basic_machine=m68k-unknown
                os=-amigaos
                ;;
        amigaunix | amix)
-               basic_machine=m68k-cbm
+               basic_machine=m68k-unknown
                os=-sysv4
                ;;
        apollo68)
                basic_machine=m68k-apollo
                os=-sysv
                ;;
+       apollo68bsd)
+               basic_machine=m68k-apollo
+               os=-bsd
+               ;;
        aux)
                basic_machine=m68k-apple
                os=-aux
@@ -229,6 +443,10 @@ case $basic_machine in
                basic_machine=ns32k-sequent
                os=-dynix
                ;;
+       c90)
+               basic_machine=c90-cray
+               os=-unicos
+               ;;
        convex-c1)
                basic_machine=c1-convex
                os=-bsd
@@ -249,27 +467,45 @@ case $basic_machine in
                basic_machine=c38-convex
                os=-bsd
                ;;
-       cray | ymp)
-               basic_machine=ymp-cray
+       cray | j90)
+               basic_machine=j90-cray
                os=-unicos
                ;;
-       cray2)
-               basic_machine=cray2-cray
-               os=-unicos
+       craynv)
+               basic_machine=craynv-cray
+               os=-unicosmp
                ;;
-       [ctj]90-cray)
-               basic_machine=c90-cray
-               os=-unicos
+       cr16c)
+               basic_machine=cr16c-unknown
+               os=-elf
                ;;
        crds | unos)
                basic_machine=m68k-crds
                ;;
+       crisv32 | crisv32-* | etraxfs*)
+               basic_machine=crisv32-axis
+               ;;
+       cris | cris-* | etrax*)
+               basic_machine=cris-axis
+               ;;
+       crx)
+               basic_machine=crx-unknown
+               os=-elf
+               ;;
        da30 | da30-*)
                basic_machine=m68k-da30
                ;;
        decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
                basic_machine=mips-dec
                ;;
+       decsystem10* | dec10*)
+               basic_machine=pdp10-dec
+               os=-tops10
+               ;;
+       decsystem20* | dec20*)
+               basic_machine=pdp10-dec
+               os=-tops20
+               ;;
        delta | 3300 | motorola-3300 | motorola-delta \
              | 3300-motorola | delta-motorola)
                basic_machine=m68k-motorola
@@ -278,6 +514,10 @@ case $basic_machine in
                basic_machine=m88k-motorola
                os=-sysv3
                ;;
+       djgpp)
+               basic_machine=i586-pc
+               os=-msdosdjgpp
+               ;;
        dpx20 | dpx20-*)
                basic_machine=rs6000-bull
                os=-bosx
@@ -297,6 +537,10 @@ case $basic_machine in
        encore | umax | mmax)
                basic_machine=ns32k-encore
                ;;
+       es1800 | OSE68k | ose68k | ose | OSE)
+               basic_machine=m68k-ericsson
+               os=-ose
+               ;;
        fx2800)
                basic_machine=i860-alliant
                ;;
@@ -307,6 +551,10 @@ case $basic_machine in
                basic_machine=tron-gmicro
                os=-sysv
                ;;
+       go32)
+               basic_machine=i386-pc
+               os=-go32
+               ;;
        h3050r* | hiux*)
                basic_machine=hppa1.1-hitachi
                os=-hiuxwe2
@@ -315,6 +563,14 @@ case $basic_machine in
                basic_machine=h8300-hitachi
                os=-hms
                ;;
+       h8300xray)
+               basic_machine=h8300-hitachi
+               os=-xray
+               ;;
+       h8500hms)
+               basic_machine=h8500-hitachi
+               os=-hms
+               ;;
        harris)
                basic_machine=m88k-harris
                os=-sysv3
@@ -330,13 +586,30 @@ case $basic_machine in
                basic_machine=m68k-hp
                os=-hpux
                ;;
+       hp3k9[0-9][0-9] | hp9[0-9][0-9])
+               basic_machine=hppa1.0-hp
+               ;;
        hp9k2[0-9][0-9] | hp9k31[0-9])
                basic_machine=m68000-hp
                ;;
        hp9k3[2-9][0-9])
                basic_machine=m68k-hp
                ;;
-       hp9k7[0-9][0-9] | hp7[0-9][0-9] | hp9k8[0-9]7 | hp8[0-9]7)
+       hp9k6[0-9][0-9] | hp6[0-9][0-9])
+               basic_machine=hppa1.0-hp
+               ;;
+       hp9k7[0-79][0-9] | hp7[0-79][0-9])
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k78[0-9] | hp78[0-9])
+               # FIXME: really hppa2.0-hp
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+               # FIXME: really hppa2.0-hp
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k8[0-9][13679] | hp8[0-9][13679])
                basic_machine=hppa1.1-hp
                ;;
        hp9k8[0-9][0-9] | hp8[0-9][0-9])
@@ -345,27 +618,42 @@ case $basic_machine in
        hppa-next)
                os=-nextstep3
                ;;
+       hppaosf)
+               basic_machine=hppa1.1-hp
+               os=-osf
+               ;;
+       hppro)
+               basic_machine=hppa1.1-hp
+               os=-proelf
+               ;;
        i370-ibm* | ibm*)
                basic_machine=i370-ibm
-               os=-mvs
                ;;
 # I'm not sure what "Sysv32" means.  Should this be sysv3.2?
-       i[3456]86v32)
+       i*86v32)
                basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
                os=-sysv32
                ;;
-       i[3456]86v4*)
+       i*86v4*)
                basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
                os=-sysv4
                ;;
-       i[3456]86v)
+       i*86v)
                basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
                os=-sysv
                ;;
-       i[3456]86sol2)
+       i*86sol2)
                basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
                os=-solaris2
                ;;
+       i386mach)
+               basic_machine=i386-mach
+               os=-mach
+               ;;
+       i386-vsta | vsta)
+               basic_machine=i386-unknown
+               os=-vsta
+               ;;
        iris | iris4d)
                basic_machine=mips-sgi
                case $os in
@@ -391,16 +679,16 @@ case $basic_machine in
                basic_machine=ns32k-utek
                os=-sysv
                ;;
+       mingw32)
+               basic_machine=i386-pc
+               os=-mingw32
+               ;;
        miniframe)
                basic_machine=m68000-convergent
                ;;
-       mipsel*-linux*)
-               basic_machine=mipsel-unknown
-               os=-linux-gnu
-               ;;
-       mips*-linux*)
-               basic_machine=mips-unknown
-               os=-linux-gnu
+       *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+               basic_machine=m68k-atari
+               os=-mint
                ;;
        mips3*-*)
                basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
@@ -408,10 +696,37 @@ case $basic_machine in
        mips3*)
                basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
                ;;
+       monitor)
+               basic_machine=m68k-rom68k
+               os=-coff
+               ;;
+       morphos)
+               basic_machine=powerpc-unknown
+               os=-morphos
+               ;;
+       msdos)
+               basic_machine=i386-pc
+               os=-msdos
+               ;;
+       ms1-*)
+               basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+               ;;
+       mvs)
+               basic_machine=i370-ibm
+               os=-mvs
+               ;;
        ncr3000)
                basic_machine=i486-ncr
                os=-sysv4
                ;;
+       netbsd386)
+               basic_machine=i386-unknown
+               os=-netbsd
+               ;;
+       netwinder)
+               basic_machine=armv4l-rebel
+               os=-linux
+               ;;
        news | news700 | news800 | news900)
                basic_machine=m68k-sony
                os=-newsos
@@ -424,6 +739,10 @@ case $basic_machine in
                basic_machine=mips-sony
                os=-newsos
                ;;
+       necv70)
+               basic_machine=v70-nec
+               os=-sysv
+               ;;
        next | m*-next )
                basic_machine=m68k-next
                case $os in
@@ -449,9 +768,39 @@ case $basic_machine in
                basic_machine=i960-intel
                os=-nindy
                ;;
+       mon960)
+               basic_machine=i960-intel
+               os=-mon960
+               ;;
+       nonstopux)
+               basic_machine=mips-compaq
+               os=-nonstopux
+               ;;
        np1)
                basic_machine=np1-gould
                ;;
+       nsr-tandem)
+               basic_machine=nsr-tandem
+               ;;
+       op50n-* | op60c-*)
+               basic_machine=hppa1.1-oki
+               os=-proelf
+               ;;
+       openrisc | openrisc-*)
+               basic_machine=or32-unknown
+               ;;
+       os400)
+               basic_machine=powerpc-ibm
+               os=-os400
+               ;;
+       OSE68000 | ose68000)
+               basic_machine=m68000-ericsson
+               os=-ose
+               ;;
+       os68k)
+               basic_machine=m68k-none
+               os=-os68k
+               ;;
        pa-hitachi)
                basic_machine=hppa1.1-hitachi
                os=-hiuxwe2
@@ -466,53 +815,105 @@ case $basic_machine in
        pbb)
                basic_machine=m68k-tti
                ;;
-        pc532 | pc532-*)
+       pc532 | pc532-*)
                basic_machine=ns32k-pc532
                ;;
-       pentium | p5)
-               basic_machine=i586-intel
+       pc98)
+               basic_machine=i386-pc
                ;;
-       pentiumpro | p6)
-               basic_machine=i686-intel
+       pc98-*)
+               basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
-       pentium-* | p5-*)
+       pentium | p5 | k5 | k6 | nexgen | viac3)
+               basic_machine=i586-pc
+               ;;
+       pentiumpro | p6 | 6x86 | athlon | athlon_*)
+               basic_machine=i686-pc
+               ;;
+       pentiumii | pentium2 | pentiumiii | pentium3)
+               basic_machine=i686-pc
+               ;;
+       pentium4)
+               basic_machine=i786-pc
+               ;;
+       pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
                basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
-       pentiumpro-* | p6-*)
+       pentiumpro-* | p6-* | 6x86-* | athlon-*)
                basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
-       k5)
-               # We don't have specific support for AMD's K5 yet, so just call it a Pentium
-               basic_machine=i586-amd
+       pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+               basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
-       nexen)
-               # We don't have specific support for Nexgen yet, so just call it a Pentium
-               basic_machine=i586-nexgen
+       pentium4-*)
+               basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
        pn)
                basic_machine=pn-gould
                ;;
-       power)  basic_machine=rs6000-ibm
+       power)  basic_machine=power-ibm
                ;;
        ppc)    basic_machine=powerpc-unknown
-               ;;
+               ;;
        ppc-*)  basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
        ppcle | powerpclittle | ppc-le | powerpc-little)
                basic_machine=powerpcle-unknown
-               ;;
+               ;;
        ppcle-* | powerpclittle-*)
                basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
                ;;
+       ppc64)  basic_machine=powerpc64-unknown
+               ;;
+       ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+               basic_machine=powerpc64le-unknown
+               ;;
+       ppc64le-* | powerpc64little-*)
+               basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
        ps2)
                basic_machine=i386-ibm
                ;;
+       pw32)
+               basic_machine=i586-unknown
+               os=-pw32
+               ;;
+       rdos)
+               basic_machine=i386-pc
+               os=-rdos
+               ;;
+       rom68k)
+               basic_machine=m68k-rom68k
+               os=-coff
+               ;;
        rm[46]00)
                basic_machine=mips-siemens
                ;;
        rtpc | rtpc-*)
                basic_machine=romp-ibm
                ;;
+       s390 | s390-*)
+               basic_machine=s390-ibm
+               ;;
+       s390x | s390x-*)
+               basic_machine=s390x-ibm
+               ;;
+       sa29200)
+               basic_machine=a29k-amd
+               os=-udi
+               ;;
+       sb1)
+               basic_machine=mipsisa64sb1-unknown
+               ;;
+       sb1el)
+               basic_machine=mipsisa64sb1el-unknown
+               ;;
+       sei)
+               basic_machine=mips-sei
+               os=-seiux
+               ;;
        sequent)
                basic_machine=i386-sequent
                ;;
@@ -520,6 +921,13 @@ case $basic_machine in
                basic_machine=sh-hitachi
                os=-hms
                ;;
+       sh64)
+               basic_machine=sh64-unknown
+               ;;
+       sparclite-wrs | simso-wrs)
+               basic_machine=sparclite-wrs
+               os=-vxworks
+               ;;
        sps7)
                basic_machine=m68k-bull
                os=-sysv2
@@ -527,6 +935,13 @@ case $basic_machine in
        spur)
                basic_machine=spur-unknown
                ;;
+       st2000)
+               basic_machine=m68k-tandem
+               ;;
+       stratus)
+               basic_machine=i860-stratus
+               os=-sysv4
+               ;;
        sun2)
                basic_machine=m68000-sun
                ;;
@@ -567,19 +982,51 @@ case $basic_machine in
        sun386 | sun386i | roadrunner)
                basic_machine=i386-sun
                ;;
+       sv1)
+               basic_machine=sv1-cray
+               os=-unicos
+               ;;
        symmetry)
                basic_machine=i386-sequent
                os=-dynix
                ;;
+       t3e)
+               basic_machine=alphaev5-cray
+               os=-unicos
+               ;;
+       t90)
+               basic_machine=t90-cray
+               os=-unicos
+               ;;
+       tic54x | c54x*)
+               basic_machine=tic54x-unknown
+               os=-coff
+               ;;
+       tic55x | c55x*)
+               basic_machine=tic55x-unknown
+               os=-coff
+               ;;
+       tic6x | c6x*)
+               basic_machine=tic6x-unknown
+               os=-coff
+               ;;
        tx39)
                basic_machine=mipstx39-unknown
                ;;
        tx39el)
                basic_machine=mipstx39el-unknown
                ;;
+       toad1)
+               basic_machine=pdp10-xkl
+               os=-tops20
+               ;;
        tower | tower-32)
                basic_machine=m68k-ncr
                ;;
+       tpf)
+               basic_machine=s390x-ibm
+               os=-tpf
+               ;;
        udi29k)
                basic_machine=a29k-amd
                os=-udi
@@ -588,6 +1035,10 @@ case $basic_machine in
                basic_machine=a29k-nyu
                os=-sym1
                ;;
+       v810 | necv810)
+               basic_machine=v810-nec
+               os=-none
+               ;;
        vaxv)
                basic_machine=vax-dec
                os=-sysv
@@ -597,8 +1048,8 @@ case $basic_machine in
                os=-vms
                ;;
        vpp*|vx|vx-*)
-               basic_machine=f301-fujitsu
-               ;;
+               basic_machine=f301-fujitsu
+               ;;
        vxworks960)
                basic_machine=i960-wrs
                os=-vxworks
@@ -611,13 +1062,29 @@ case $basic_machine in
                basic_machine=a29k-wrs
                os=-vxworks
                ;;
-       xmp)
-               basic_machine=xmp-cray
-               os=-unicos
+       w65*)
+               basic_machine=w65-wdc
+               os=-none
+               ;;
+       w89k-*)
+               basic_machine=hppa1.1-winbond
+               os=-proelf
                ;;
-        xps | xps100)
+       xbox)
+               basic_machine=i686-pc
+               os=-mingw32
+               ;;
+       xps | xps100)
                basic_machine=xps100-honeywell
                ;;
+       ymp)
+               basic_machine=ymp-cray
+               os=-unicos
+               ;;
+       z8k-*-coff)
+               basic_machine=z8k-unknown
+               os=-sim
+               ;;
        none)
                basic_machine=none-none
                os=-none
@@ -625,32 +1092,44 @@ case $basic_machine in
 
 # Here we handle the default manufacturer of certain CPU types.  It is in
 # some cases the only manufacturer, in others, it is the most popular.
-       mips)
-               if [ x$os = x-linux-gnu ]; then
-                       basic_machine=mips-unknown
-               else
-                       basic_machine=mips-mips
-               fi
+       w89k)
+               basic_machine=hppa1.1-winbond
+               ;;
+       op50n)
+               basic_machine=hppa1.1-oki
+               ;;
+       op60c)
+               basic_machine=hppa1.1-oki
                ;;
        romp)
                basic_machine=romp-ibm
                ;;
+       mmix)
+               basic_machine=mmix-knuth
+               ;;
        rs6000)
                basic_machine=rs6000-ibm
                ;;
        vax)
                basic_machine=vax-dec
                ;;
+       pdp10)
+               # there are many clones, so DEC is not a safe bet
+               basic_machine=pdp10-unknown
+               ;;
        pdp11)
                basic_machine=pdp11-dec
                ;;
        we32k)
                basic_machine=we32k-att
                ;;
-       sparc)
+       sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
+               basic_machine=sh-unknown
+               ;;
+       sparc | sparcv8 | sparcv9 | sparcv9b)
                basic_machine=sparc-sun
                ;;
-        cydra)
+       cydra)
                basic_machine=cydra-cydrome
                ;;
        orion)
@@ -659,6 +1138,15 @@ case $basic_machine in
        orion105)
                basic_machine=clipper-highlevel
                ;;
+       mac | mpw | mac-mpw)
+               basic_machine=m68k-apple
+               ;;
+       pmac | pmac-mpw)
+               basic_machine=powerpc-apple
+               ;;
+       *-unknown)
+               # Make sure to match an already-canonicalized machine name.
+               ;;
        *)
                echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
                exit 1
@@ -711,14 +1199,49 @@ case $os in
              | -aos* \
              | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
              | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-             | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
-             | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* \
+             | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+             | -openbsd* | -solidbsd* \
+             | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+             | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
              | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
              | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-             | -cygwin32* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-             | -mingw32* | -linux-gnu* | -uxpv*)
+             | -chorusos* | -chorusrdb* \
+             | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+             | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+             | -uxpv* | -beos* | -mpeix* | -udk* \
+             | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+             | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+             | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+             | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+             | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+             | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+             | -skyos* | -haiku* | -rdos*)
        # Remember, each alternative MUST END IN *, to match a version number.
                ;;
+       -qnx*)
+               case $basic_machine in
+                   x86-* | i*86-*)
+                       ;;
+                   *)
+                       os=-nto$os
+                       ;;
+               esac
+               ;;
+       -nto-qnx*)
+               ;;
+       -nto*)
+               os=`echo $os | sed -e 's|nto|nto-qnx|'`
+               ;;
+       -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+             | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+             | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+               ;;
+       -mac*)
+               os=`echo $os | sed -e 's|mac|macos|'`
+               ;;
+       -linux-dietlibc)
+               os=-linux-dietlibc
+               ;;
        -linux*)
                os=`echo $os | sed -e 's|linux|linux-gnu|'`
                ;;
@@ -728,6 +1251,15 @@ case $os in
        -sunos6*)
                os=`echo $os | sed -e 's|sunos6|solaris3|'`
                ;;
+       -opened*)
+               os=-openedition
+               ;;
+        -os400*)
+               os=-os400
+               ;;
+       -wince*)
+               os=-wince
+               ;;
        -osfrose*)
                os=-osfrose
                ;;
@@ -743,11 +1275,26 @@ case $os in
        -acis*)
                os=-aos
                ;;
+       -atheos*)
+               os=-atheos
+               ;;
+       -syllable*)
+               os=-syllable
+               ;;
+       -386bsd)
+               os=-bsd
+               ;;
        -ctix* | -uts*)
                os=-sysv
                ;;
+       -nova*)
+               os=-rtmk-nova
+               ;;
        -ns2 )
-               os=-nextstep2
+               os=-nextstep2
+               ;;
+       -nsk*)
+               os=-nsk
                ;;
        # Preserve the version number of sinix5.
        -sinix5.*)
@@ -756,6 +1303,9 @@ case $os in
        -sinix*)
                os=-sysv4
                ;;
+        -tpf*)
+               os=-tpf
+               ;;
        -triton*)
                os=-sysv3
                ;;
@@ -774,9 +1324,27 @@ case $os in
        # This must come after -sysvr4.
        -sysv*)
                ;;
+       -ose*)
+               os=-ose
+               ;;
+       -es1800*)
+               os=-ose
+               ;;
        -xenix)
                os=-xenix
                ;;
+       -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+               os=-mint
+               ;;
+       -aros*)
+               os=-aros
+               ;;
+       -kaos*)
+               os=-kaos
+               ;;
+       -zvmoe)
+               os=-zvmoe
+               ;;
        -none)
                ;;
        *)
@@ -802,10 +1370,20 @@ case $basic_machine in
        *-acorn)
                os=-riscix1.2
                ;;
+       arm*-rebel)
+               os=-linux
+               ;;
        arm*-semi)
                os=-aout
                ;;
-        pdp11-*)
+    c4x-* | tic4x-*)
+        os=-coff
+        ;;
+       # This must come before the *-dec entry.
+       pdp10-*)
+               os=-tops20
+               ;;
+       pdp11-*)
                os=-none
                ;;
        *-dec | vax-*)
@@ -823,15 +1401,45 @@ case $basic_machine in
                # default.
                # os=-sunos4
                ;;
+       m68*-cisco)
+               os=-aout
+               ;;
+       mips*-cisco)
+               os=-elf
+               ;;
+       mips*-*)
+               os=-elf
+               ;;
+       or32-*)
+               os=-coff
+               ;;
        *-tti)  # must be before sparc entry or we get the wrong os.
                os=-sysv3
                ;;
        sparc-* | *-sun)
                os=-sunos4.1.1
                ;;
+       *-be)
+               os=-beos
+               ;;
+       *-haiku)
+               os=-haiku
+               ;;
        *-ibm)
                os=-aix
                ;;
+       *-knuth)
+               os=-mmixware
+               ;;
+       *-wec)
+               os=-proelf
+               ;;
+       *-winbond)
+               os=-proelf
+               ;;
+       *-oki)
+               os=-proelf
+               ;;
        *-hp)
                os=-hpux
                ;;
@@ -874,27 +1482,39 @@ case $basic_machine in
        *-next)
                os=-nextstep3
                ;;
-        *-gould)
+       *-gould)
                os=-sysv
                ;;
-        *-highlevel)
+       *-highlevel)
                os=-bsd
                ;;
        *-encore)
                os=-bsd
                ;;
-        *-sgi)
+       *-sgi)
                os=-irix
                ;;
-        *-siemens)
+       *-siemens)
                os=-sysv4
                ;;
        *-masscomp)
                os=-rtu
                ;;
-       f301-fujitsu)
+       f30[01]-fujitsu | f700-fujitsu)
                os=-uxpv
                ;;
+       *-rom68k)
+               os=-coff
+               ;;
+       *-*bug)
+               os=-coff
+               ;;
+       *-apple)
+               os=-macos
+               ;;
+       *-atari*)
+               os=-mint
+               ;;
        *)
                os=-none
                ;;
@@ -916,9 +1536,15 @@ case $basic_machine in
                        -aix*)
                                vendor=ibm
                                ;;
+                       -beos*)
+                               vendor=be
+                               ;;
                        -hpux*)
                                vendor=hp
                                ;;
+                       -mpeix*)
+                               vendor=hp
+                               ;;
                        -hiux*)
                                vendor=hitachi
                                ;;
@@ -934,21 +1560,47 @@ case $basic_machine in
                        -genix*)
                                vendor=ns
                                ;;
-                       -mvs*)
+                       -mvs* | -opened*)
+                               vendor=ibm
+                               ;;
+                       -os400*)
                                vendor=ibm
                                ;;
                        -ptx*)
                                vendor=sequent
                                ;;
-                       -vxsim* | -vxworks*)
+                       -tpf*)
+                               vendor=ibm
+                               ;;
+                       -vxsim* | -vxworks* | -windiss*)
                                vendor=wrs
                                ;;
                        -aux*)
                                vendor=apple
                                ;;
+                       -hms*)
+                               vendor=hitachi
+                               ;;
+                       -mpw* | -macos*)
+                               vendor=apple
+                               ;;
+                       -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+                               vendor=atari
+                               ;;
+                       -vos*)
+                               vendor=stratus
+                               ;;
                esac
                basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
                ;;
 esac
 
 echo $basic_machine$os
+exit
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/config.ver b/config.ver
new file mode 100644 (file)
index 0000000..5b6b6c8
--- /dev/null
@@ -0,0 +1,44 @@
+
+JPEG_VER_MAJOR=62
+JPEG_VER_MINOR=1
+JPEG_REVISION=0
+
+case $host_os in
+  cygwin*)
+    # The shared library built from this source code is *not* binary
+    # compatible with the cygwin's official binary release (cygjpeg-62.dll).
+    # This is because the official binary has been built with
+    # the lossless jpeg patch which is available as ljpeg-6b.tar.gz .
+    # Therefore we decided to give the shared library the version number
+    # other than 62.
+    #
+    JPEG_VER_MAJOR=162
+    JPEG_VER_MINOR=0
+    ;;
+  freebsd*)
+    # This follows the official binary release in the ports collection.
+    JPEG_VER_MAJOR=9
+    ;;
+esac
+
+# convert absolute version numbers to libtool ages
+case $version_type in
+  freebsd-aout|freebsd-elf|sunos)
+    JPEG_LT_CURRENT=$JPEG_VER_MAJOR
+    JPEG_LT_REVISION=$JPEG_VER_MINOR
+    JPEG_LT_AGE=0
+    ;;
+  irix|nonstopux)
+    JPEG_LT_CURRENT=`expr $JPEG_VER_MAJOR + $JPEG_VER_MINOR - 1`
+    JPEG_LT_AGE=$JPEG_VER_MINOR
+    JPEG_LT_REVISION=$JPEG_VER_MINOR
+    ;;
+  *)
+    JPEG_LT_CURRENT=`expr $JPEG_VER_MAJOR + $JPEG_VER_MINOR`
+    JPEG_LT_AGE=$JPEG_VER_MINOR
+    JPEG_LT_REVISION=$JPEG_REVISION
+    ;;
+esac
+
+JPEG_LIB_VERSION=$JPEG_LT_CURRENT:$JPEG_LT_REVISION:$JPEG_LT_AGE
+
index 35c9db5cadcf2b0b7e98ce8ad51a29ec55616fd0..9c368e5e3736465dfead664fb78522765e89dbb7 100755 (executable)
--- a/configure
+++ b/configure
@@ -1,7 +1,7 @@
 #! /bin/sh
 
 # Guess values for system-dependent variables and create Makefiles.
-# Generated automatically using autoconf version 2.12 
+# Generated automatically using autoconf version 2.13 
 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc.
 #
 # This configure script is free software; the Free Software Foundation
@@ -12,13 +12,190 @@ ac_help=
 ac_default_prefix=/usr/local
 # Any additions from configure.in:
 ac_help="$ac_help
-  --enable-shared         build shared library using GNU libtool"
+  --enable-shared[=PKGS]  build shared libraries [default=no]"
 ac_help="$ac_help
-  --enable-static         build static library using GNU libtool"
+  --enable-static[=PKGS]  build static libraries [default=no]"
+ac_help="$ac_help
+  --enable-fast-install[=PKGS]  optimize for fast installation [default=yes]"
+ac_help="$ac_help
+  --with-gnu-ld           assume the C compiler uses GNU ld [default=no]"
+
+# Find the correct PATH separator.  Usually this is `:', but
+# DJGPP uses `;' like DOS.
+if test "X${PATH_SEPARATOR+set}" != Xset; then
+  UNAME=${UNAME-`uname 2>/dev/null`}
+  case X$UNAME in
+    *-DOS) lt_cv_sys_path_separator=';' ;;
+    *)     lt_cv_sys_path_separator=':' ;;
+  esac
+  PATH_SEPARATOR=$lt_cv_sys_path_separator
+fi
+
+
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+case X$ECHO in
+X*--fallback-echo)
+  # Remove one level of quotation (which was required for Make).
+  ECHO=`echo "$ECHO" | sed 's,\\\\\$\\$0,'$0','`
+  ;;
+esac
+
+echo=${ECHO-echo}
+if test "X$1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X$1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell.
+  exec $SHELL "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+
+EOF
+  exit 0
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+if test -z "$ECHO"; then
+if test "X${echo_test_string+set}" != Xset; then
+# find a string as large as possible, as long as the shell can cope with it
+  for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do
+    # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+    if (echo_test_string="`eval $cmd`") 2>/dev/null &&
+       echo_test_string="`eval $cmd`" &&
+       (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null
+    then
+      break
+    fi
+  done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+   echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+   test "X$echo_testing_string" = "X$echo_test_string"; then
+  :
+else
+  # The Solaris, AIX, and Digital Unix default echo programs unquote
+  # backslashes.  This makes it impossible to quote backslashes using
+  #   echo "$something" | sed 's/\\/\\\\/g'
+  #
+  # So, first we look for a working echo in the user's PATH.
+
+  IFS="${IFS=  }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for dir in $PATH /usr/ucb; do
+    if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+       test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      echo="$dir/echo"
+      break
+    fi
+  done
+  IFS="$save_ifs"
+
+  if test "X$echo" = Xecho; then
+    # We didn't find a better echo, so look for alternatives.
+    if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      # This shell has a builtin print -r that does the trick.
+      echo='print -r'
+    elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+        test "X$CONFIG_SHELL" != X/bin/ksh; then
+      # If we have ksh, try running configure again with it.
+      ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh}
+      export ORIGINAL_CONFIG_SHELL
+      CONFIG_SHELL=/bin/ksh
+      export CONFIG_SHELL
+      exec $CONFIG_SHELL "$0" --no-reexec ${1+"$@"}
+    else
+      # Try using printf.
+      echo='printf %s\n'
+      if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+        echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+        test "X$echo_testing_string" = "X$echo_test_string"; then
+       # Cool, printf works
+       :
+      elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` &&
+          test "X$echo_testing_string" = 'X\t' &&
+          echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+          test "X$echo_testing_string" = "X$echo_test_string"; then
+       CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL
+       export CONFIG_SHELL
+       SHELL="$CONFIG_SHELL"
+       export SHELL
+       echo="$CONFIG_SHELL $0 --fallback-echo"
+      elif echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` &&
+          test "X$echo_testing_string" = 'X\t' &&
+          echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+          test "X$echo_testing_string" = "X$echo_test_string"; then
+       echo="$CONFIG_SHELL $0 --fallback-echo"
+      else
+       # maybe with a smaller string...
+       prev=:
+
+       for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do
+         if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null
+         then
+           break
+         fi
+         prev="$cmd"
+       done
+
+       if test "$prev" != 'sed 50q "$0"'; then
+         echo_test_string=`eval $prev`
+         export echo_test_string
+         exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "$0" ${1+"$@"}
+       else
+         # Oops.  We lost completely, so just stick with echo.
+         echo=echo
+       fi
+      fi
+    fi
+  fi
+fi
+fi
+
+# Copy echo and quote the copy suitably for passing to libtool from
+# the Makefile, instead of quoting the original, which is used later.
+ECHO=$echo
+if test "X$ECHO" = "X$CONFIG_SHELL $0 --fallback-echo"; then
+   ECHO="$CONFIG_SHELL \\\$\$0 --fallback-echo"
+fi
+
+
+ac_help="$ac_help
+  --disable-libtool-lock  avoid locking (might break parallel builds)"
+ac_help="$ac_help
+  --with-pic              try to use only PIC/non-PIC objects [default=use both]"
 ac_help="$ac_help
   --enable-maxmem[=N]     enable use of temp files, set max mem usage to N MB"
 ac_help="$ac_help
 "
+ac_help="$ac_help
+  --disable-mmx           do not use MMX instruction set"
+ac_help="$ac_help
+  --disable-3dnow         do not use 3DNow! instruction set"
+ac_help="$ac_help
+  --disable-sse           do not use SSE instruction set"
+ac_help="$ac_help
+  --disable-sse2          do not use SSE2 instruction set"
+ac_help="$ac_help
+  --enable-uchar-boolean  define type \"boolean\" as unsigned char (for Windows)"
 
 # Initialize some variables set by options.
 # The variables have the same names as the options, with
@@ -57,6 +234,7 @@ mandir='${prefix}/man'
 # Initialize some other variables.
 subdirs=
 MFLAGS= MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
 # Maximum number of lines to put in a shell here document.
 ac_max_here_lines=12
 
@@ -340,7 +518,7 @@ EOF
     verbose=yes ;;
 
   -version | --version | --versio | --versi | --vers)
-    echo "configure generated by autoconf version 2.12"
+    echo "configure generated by autoconf version 2.13"
     exit 0 ;;
 
   -with-* | --with-*)
@@ -386,17 +564,6 @@ EOF
   -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; }
     ;;
 
-  *=*)
-    varname=`echo "$ac_option"|sed -e 's/=.*//'`
-    # Reject names that aren't valid shell variable names.
-    if test -n "`echo $varname| sed 's/[a-zA-Z0-9_]//g'`"; then
-      { echo "configure: error: $varname: invalid shell variable name" 1>&2; exit 1; }
-    fi
-    val="`echo "$ac_option"|sed 's/[^=]*=//'`"
-    test -n "$verbose" && echo "       setting shell variable $varname to $val"
-    eval "$varname='$val'"
-    eval "export $varname" ;;
-
   *)
     if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then
       echo "configure: warning: $ac_option: invalid host type" 1>&2
@@ -509,14 +676,23 @@ for ac_site_file in $CONFIG_SITE; do
   fi
 done
 
+if test -r "$cache_file"; then
+  echo "loading cache $cache_file"
+  . $cache_file
+else
+  echo "creating cache $cache_file"
+  > $cache_file
+fi
 
 ac_ext=c
 # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
 cross_compiling=$ac_cv_prog_cc_cross
 
+ac_exeext=
+ac_objext=o
 if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
   # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
   if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
@@ -534,15 +710,16 @@ fi
 # Extract the first word of "gcc", so it can be a program name with args.
 set dummy gcc; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:538: checking for $ac_word" >&5
+echo "configure:714: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   if test -n "$CC"; then
   ac_cv_prog_CC="$CC" # Let the user override the test.
 else
-  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-  for ac_dir in $PATH; do
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
     test -z "$ac_dir" && ac_dir=.
     if test -f $ac_dir/$ac_word; then
       ac_cv_prog_CC="gcc"
@@ -563,16 +740,17 @@ if test -z "$CC"; then
   # Extract the first word of "cc", so it can be a program name with args.
 set dummy cc; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:567: checking for $ac_word" >&5
+echo "configure:744: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   if test -n "$CC"; then
   ac_cv_prog_CC="$CC" # Let the user override the test.
 else
-  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
   ac_prog_rejected=no
-  for ac_dir in $PATH; do
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
     test -z "$ac_dir" && ac_dir=.
     if test -f $ac_dir/$ac_word; then
       if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then
@@ -607,25 +785,61 @@ else
   echo "$ac_t""no" 1>&6
 fi
 
+  if test -z "$CC"; then
+    case "`uname -s`" in
+    *win32* | *WIN32*)
+      # Extract the first word of "cl", so it can be a program name with args.
+set dummy cl; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:795: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_CC="cl"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+  echo "$ac_t""$CC" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+ ;;
+    esac
+  fi
   test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; }
 fi
 
 echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6
-echo "configure:615: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
+echo "configure:827: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
 
 ac_ext=c
 # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
 cross_compiling=$ac_cv_prog_cc_cross
 
-cat > conftest.$ac_ext <<EOF
-#line 625 "configure"
+cat > conftest.$ac_ext << EOF
+
+#line 838 "configure"
 #include "confdefs.h"
+
 main(){return(0);}
 EOF
-if { (eval echo configure:629: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+if { (eval echo configure:843: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
   ac_cv_prog_cc_works=yes
   # If we can't run a trivial program, we are probably using a cross compiler.
   if (./conftest; exit) 2>/dev/null; then
@@ -639,18 +853,24 @@ else
   ac_cv_prog_cc_works=no
 fi
 rm -fr conftest*
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
 
 echo "$ac_t""$ac_cv_prog_cc_works" 1>&6
 if test $ac_cv_prog_cc_works = no; then
   { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; }
 fi
 echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6
-echo "configure:649: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
+echo "configure:869: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6
 cross_compiling=$ac_cv_prog_cc_cross
 
 echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
-echo "configure:654: checking whether we are using GNU C" >&5
+echo "configure:874: checking whether we are using GNU C" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
@@ -659,7 +879,7 @@ else
   yes;
 #endif
 EOF
-if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:663: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
+if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:883: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
   ac_cv_prog_gcc=yes
 else
   ac_cv_prog_gcc=no
@@ -670,14 +890,47 @@ echo "$ac_t""$ac_cv_prog_gcc" 1>&6
 
 if test $ac_cv_prog_gcc = yes; then
   GCC=yes
-  test "${CFLAGS+set}" = set || CFLAGS="-O2"
 else
   GCC=
-  test "${CFLAGS+set}" = set || CFLAGS="-O"
+fi
+
+ac_test_CFLAGS="${CFLAGS+set}"
+ac_save_CFLAGS="$CFLAGS"
+CFLAGS=
+echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6
+echo "configure:902: checking whether ${CC-cc} accepts -g" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  echo 'void f(){}' > conftest.c
+if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then
+  ac_cv_prog_cc_g=yes
+else
+  ac_cv_prog_cc_g=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_prog_cc_g" 1>&6
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS="$ac_save_CFLAGS"
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
 fi
 
 echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6
-echo "configure:681: checking how to run the C preprocessor" >&5
+echo "configure:934: checking how to run the C preprocessor" >&5
 # On Suns, sometimes $CPP names a directory.
 if test -n "$CPP" && test -d "$CPP"; then
   CPP=
@@ -692,14 +945,14 @@ else
   # On the NeXT, cc -E runs the code through the compiler's parser,
   # not just through cpp.
   cat > conftest.$ac_ext <<EOF
-#line 696 "configure"
+#line 949 "configure"
 #include "confdefs.h"
 #include <assert.h>
 Syntax Error
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:702: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:955: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   :
 else
@@ -709,14 +962,31 @@ else
   rm -rf conftest*
   CPP="${CC-cc} -E -traditional-cpp"
   cat > conftest.$ac_ext <<EOF
-#line 713 "configure"
+#line 966 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:972: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  :
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  CPP="${CC-cc} -nologo -E"
+  cat > conftest.$ac_ext <<EOF
+#line 983 "configure"
 #include "confdefs.h"
 #include <assert.h>
 Syntax Error
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:719: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:989: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   :
 else
@@ -728,6 +998,8 @@ else
 fi
 rm -f conftest*
 fi
+rm -f conftest*
+fi
 rm -f conftest*
   ac_cv_prog_CPP="$CPP"
 fi
@@ -738,12 +1010,12 @@ fi
 echo "$ac_t""$CPP" 1>&6
 
 echo $ac_n "checking for function prototypes""... $ac_c" 1>&6
-echo "configure:742: checking for function prototypes" >&5
+echo "configure:1014: checking for function prototypes" >&5
 if eval "test \"`echo '$''{'ijg_cv_have_prototypes'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 747 "configure"
+#line 1019 "configure"
 #include "confdefs.h"
 
 int testfunction (int arg1, int * arg2); /* check prototypes */
@@ -761,7 +1033,7 @@ int main() {
  
 ; return 0; }
 EOF
-if { (eval echo configure:765: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1037: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ijg_cv_have_prototypes=yes
 else
@@ -788,18 +1060,18 @@ else
 fi
 ac_safe=`echo "stddef.h" | sed 'y%./+-%__p_%'`
 echo $ac_n "checking for stddef.h""... $ac_c" 1>&6
-echo "configure:792: checking for stddef.h" >&5
+echo "configure:1064: checking for stddef.h" >&5
 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 797 "configure"
+#line 1069 "configure"
 #include "confdefs.h"
 #include <stddef.h>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:802: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:1074: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
   eval "ac_cv_header_$ac_safe=yes"
@@ -824,18 +1096,18 @@ fi
 
 ac_safe=`echo "stdlib.h" | sed 'y%./+-%__p_%'`
 echo $ac_n "checking for stdlib.h""... $ac_c" 1>&6
-echo "configure:828: checking for stdlib.h" >&5
+echo "configure:1100: checking for stdlib.h" >&5
 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 833 "configure"
+#line 1105 "configure"
 #include "confdefs.h"
 #include <stdlib.h>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:838: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:1110: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
   eval "ac_cv_header_$ac_safe=yes"
@@ -860,18 +1132,18 @@ fi
 
 ac_safe=`echo "string.h" | sed 'y%./+-%__p_%'`
 echo $ac_n "checking for string.h""... $ac_c" 1>&6
-echo "configure:864: checking for string.h" >&5
+echo "configure:1136: checking for string.h" >&5
 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 869 "configure"
+#line 1141 "configure"
 #include "confdefs.h"
 #include <string.h>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:874: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:1146: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
   eval "ac_cv_header_$ac_safe=yes"
@@ -896,9 +1168,9 @@ EOF
 fi
 
 echo $ac_n "checking for size_t""... $ac_c" 1>&6
-echo "configure:900: checking for size_t" >&5
+echo "configure:1172: checking for size_t" >&5
 cat > conftest.$ac_ext <<EOF
-#line 902 "configure"
+#line 1174 "configure"
 #include "confdefs.h"
 
 #ifdef HAVE_STDDEF_H
@@ -919,7 +1191,7 @@ int main() {
  my_size_t foovar; 
 ; return 0; }
 EOF
-if { (eval echo configure:923: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1195: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ijg_size_t_ok=yes
 else
@@ -933,18 +1205,18 @@ echo "$ac_t""$ijg_size_t_ok" 1>&6
 if test "$ijg_size_t_ok" != yes; then
 ac_safe=`echo "sys/types.h" | sed 'y%./+-%__p_%'`
 echo $ac_n "checking for sys/types.h""... $ac_c" 1>&6
-echo "configure:937: checking for sys/types.h" >&5
+echo "configure:1209: checking for sys/types.h" >&5
 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 942 "configure"
+#line 1214 "configure"
 #include "confdefs.h"
 #include <sys/types.h>
 EOF
 ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:947: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
+{ (eval echo configure:1219: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
 if test -z "$ac_err"; then
   rm -rf conftest*
   eval "ac_cv_header_$ac_safe=yes"
@@ -964,7 +1236,7 @@ if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
 EOF
 
 cat > conftest.$ac_ext <<EOF
-#line 968 "configure"
+#line 1240 "configure"
 #include "confdefs.h"
 #include <sys/types.h>
 EOF
@@ -990,16 +1262,16 @@ if test "$ijg_size_t_ok" = no; then
 fi
 fi
 echo $ac_n "checking for type unsigned char""... $ac_c" 1>&6
-echo "configure:994: checking for type unsigned char" >&5
+echo "configure:1266: checking for type unsigned char" >&5
 cat > conftest.$ac_ext <<EOF
-#line 996 "configure"
+#line 1268 "configure"
 #include "confdefs.h"
 
 int main() {
  unsigned char un_char; 
 ; return 0; }
 EOF
-if { (eval echo configure:1003: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1275: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   echo "$ac_t""yes" 1>&6
 cat >> confdefs.h <<\EOF
@@ -1014,16 +1286,16 @@ else
 fi
 rm -f conftest*
 echo $ac_n "checking for type unsigned short""... $ac_c" 1>&6
-echo "configure:1018: checking for type unsigned short" >&5
+echo "configure:1290: checking for type unsigned short" >&5
 cat > conftest.$ac_ext <<EOF
-#line 1020 "configure"
+#line 1292 "configure"
 #include "confdefs.h"
 
 int main() {
  unsigned short un_short; 
 ; return 0; }
 EOF
-if { (eval echo configure:1027: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1299: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   echo "$ac_t""yes" 1>&6
 cat >> confdefs.h <<\EOF
@@ -1038,9 +1310,9 @@ else
 fi
 rm -f conftest*
 echo $ac_n "checking for type void""... $ac_c" 1>&6
-echo "configure:1042: checking for type void" >&5
+echo "configure:1314: checking for type void" >&5
 cat > conftest.$ac_ext <<EOF
-#line 1044 "configure"
+#line 1316 "configure"
 #include "confdefs.h"
 
 /* Caution: a C++ compiler will insist on valid prototypes */
@@ -1068,7 +1340,7 @@ int main() {
  
 ; return 0; }
 EOF
-if { (eval echo configure:1072: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1344: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   echo "$ac_t""yes" 1>&6
 else
@@ -1084,12 +1356,12 @@ fi
 rm -f conftest*
 
 echo $ac_n "checking for working const""... $ac_c" 1>&6
-echo "configure:1088: checking for working const" >&5
+echo "configure:1360: checking for working const" >&5
 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1093 "configure"
+#line 1365 "configure"
 #include "confdefs.h"
 
 int main() {
@@ -1138,7 +1410,7 @@ ccp = (char const *const *) p;
 
 ; return 0; }
 EOF
-if { (eval echo configure:1142: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1414: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ac_cv_c_const=yes
 else
@@ -1159,10 +1431,10 @@ EOF
 fi
 
 echo $ac_n "checking for inline""... $ac_c" 1>&6
-echo "configure:1163: checking for inline" >&5
+echo "configure:1435: checking for inline" >&5
 ijg_cv_inline=""
 cat > conftest.$ac_ext <<EOF
-#line 1166 "configure"
+#line 1438 "configure"
 #include "confdefs.h"
 
 int main() {
@@ -1170,7 +1442,7 @@ int main() {
 int bar() { return foo();
 ; return 0; }
 EOF
-if { (eval echo configure:1174: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1446: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ijg_cv_inline="__inline__"
 else
@@ -1178,7 +1450,7 @@ else
   cat conftest.$ac_ext >&5
   rm -rf conftest*
   cat > conftest.$ac_ext <<EOF
-#line 1182 "configure"
+#line 1454 "configure"
 #include "confdefs.h"
 
 int main() {
@@ -1186,7 +1458,7 @@ int main() {
 int bar() { return foo();
 ; return 0; }
 EOF
-if { (eval echo configure:1190: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1462: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ijg_cv_inline="__inline"
 else
@@ -1194,7 +1466,7 @@ else
   cat conftest.$ac_ext >&5
   rm -rf conftest*
   cat > conftest.$ac_ext <<EOF
-#line 1198 "configure"
+#line 1470 "configure"
 #include "confdefs.h"
 
 int main() {
@@ -1202,7 +1474,7 @@ int main() {
 int bar() { return foo();
 ; return 0; }
 EOF
-if { (eval echo configure:1206: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1478: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   ijg_cv_inline="inline"
 else
@@ -1220,16 +1492,16 @@ cat >> confdefs.h <<EOF
 EOF
 
 echo $ac_n "checking for broken incomplete types""... $ac_c" 1>&6
-echo "configure:1224: checking for broken incomplete types" >&5
+echo "configure:1496: checking for broken incomplete types" >&5
 cat > conftest.$ac_ext <<EOF
-#line 1226 "configure"
+#line 1498 "configure"
 #include "confdefs.h"
  typedef struct undefined_structure * undef_struct_ptr; 
 int main() {
 
 ; return 0; }
 EOF
-if { (eval echo configure:1233: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:1505: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
   echo "$ac_t""ok" 1>&6
 else
@@ -1244,9 +1516,9 @@ EOF
 fi
 rm -f conftest*
 echo $ac_n "checking for short external names""... $ac_c" 1>&6
-echo "configure:1248: checking for short external names" >&5
+echo "configure:1520: checking for short external names" >&5
 cat > conftest.$ac_ext <<EOF
-#line 1250 "configure"
+#line 1522 "configure"
 #include "confdefs.h"
 
 int possibly_duplicate_function () { return 0; }
@@ -1256,7 +1528,7 @@ int main() {
  
 ; return 0; }
 EOF
-if { (eval echo configure:1260: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+if { (eval echo configure:1532: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
   rm -rf conftest*
   echo "$ac_t""ok" 1>&6
 else
@@ -1271,14 +1543,14 @@ EOF
 fi
 rm -f conftest*
 echo $ac_n "checking to see if char is signed""... $ac_c" 1>&6
-echo "configure:1275: checking to see if char is signed" >&5
+echo "configure:1547: checking to see if char is signed" >&5
 if test "$cross_compiling" = yes; then
   echo Assuming that char is signed on target machine.
 echo If it is unsigned, this will be a little bit inefficient.
 
 else
   cat > conftest.$ac_ext <<EOF
-#line 1282 "configure"
+#line 1554 "configure"
 #include "confdefs.h"
 
 #ifdef HAVE_PROTOTYPES
@@ -1302,7 +1574,7 @@ main() {
   exit(is_char_signed((int) signed_char_check));
 }
 EOF
-if { (eval echo configure:1306: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:1578: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
 then
   echo "$ac_t""no" 1>&6
 cat >> confdefs.h <<\EOF
@@ -1319,12 +1591,12 @@ rm -fr conftest*
 fi
 
 echo $ac_n "checking to see if right shift is signed""... $ac_c" 1>&6
-echo "configure:1323: checking to see if right shift is signed" >&5
+echo "configure:1595: checking to see if right shift is signed" >&5
 if test "$cross_compiling" = yes; then
   echo "$ac_t""Assuming that right shift is signed on target machine." 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1328 "configure"
+#line 1600 "configure"
 #include "confdefs.h"
 
 #ifdef HAVE_PROTOTYPES
@@ -1354,7 +1626,7 @@ main() {
   exit(is_shifting_signed(-0x7F7E80B1L));
 }
 EOF
-if { (eval echo configure:1358: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:1630: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
 then
   echo "$ac_t""no" 1>&6
 cat >> confdefs.h <<\EOF
@@ -1371,12 +1643,12 @@ rm -fr conftest*
 fi
 
 echo $ac_n "checking to see if fopen accepts b spec""... $ac_c" 1>&6
-echo "configure:1375: checking to see if fopen accepts b spec" >&5
+echo "configure:1647: checking to see if fopen accepts b spec" >&5
 if test "$cross_compiling" = yes; then
   echo "$ac_t""Assuming that it does." 1>&6
 else
   cat > conftest.$ac_ext <<EOF
-#line 1380 "configure"
+#line 1652 "configure"
 #include "confdefs.h"
 
 #include <stdio.h>
@@ -1386,7 +1658,7 @@ main() {
   exit(1);
 }
 EOF
-if { (eval echo configure:1390: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:1662: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
 then
   echo "$ac_t""yes" 1>&6
 else
@@ -1428,28 +1700,30 @@ ac_configure=$ac_aux_dir/configure # This should be Cygnus configure.
 # SunOS /usr/etc/install
 # IRIX /sbin/install
 # AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
 # AFS /usr/afsws/bin/install, which mishandles nonexistent args
 # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
 # ./install, which can be erroneously created by make from ./install.sh.
 echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6
-echo "configure:1436: checking for a BSD compatible install" >&5
+echo "configure:1709: checking for a BSD compatible install" >&5
 if test -z "$INSTALL"; then
 if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
-    IFS="${IFS=        }"; ac_save_IFS="$IFS"; IFS="${IFS}:"
+    IFS="${IFS=        }"; ac_save_IFS="$IFS"; IFS=":"
   for ac_dir in $PATH; do
     # Account for people who put trailing slashes in PATH elements.
     case "$ac_dir/" in
     /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
     *)
       # OSF1 and SCO ODT 3.0 have their own names for install.
-      for ac_prog in ginstall installbsd scoinst install; do
+      # Don't use installbsd from OSF since it installs stuff as root
+      # by default.
+      for ac_prog in ginstall scoinst install; do
         if test -f $ac_dir/$ac_prog; then
          if test $ac_prog = install &&
             grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
            # AIX install.  It has an incompatible calling convention.
-           # OSF/1 installbsd also uses dspmsg, but is usable.
            :
          else
            ac_cv_path_install="$ac_dir/$ac_prog -c"
@@ -1479,20 +1753,23 @@ echo "$ac_t""$INSTALL" 1>&6
 # It thinks the first close brace ends the variable substitution.
 test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
 
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}'
+
 test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
 
 # Extract the first word of "ranlib", so it can be a program name with args.
 set dummy ranlib; ac_word=$2
 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:1488: checking for $ac_word" >&5
+echo "configure:1764: checking for $ac_word" >&5
 if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
   echo $ac_n "(cached) $ac_c" 1>&6
 else
   if test -n "$RANLIB"; then
   ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
 else
-  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-  for ac_dir in $PATH; do
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
     test -z "$ac_dir" && ac_dir=.
     if test -f $ac_dir/$ac_word; then
       ac_cv_prog_RANLIB="ranlib"
@@ -1511,30 +1788,186 @@ else
 fi
 
 
+
+# Make sure we can run config.sub.
+if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then :
+else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; }
+fi
+
+echo $ac_n "checking host system type""... $ac_c" 1>&6
+echo "configure:1799: checking host system type" >&5
+
+host_alias=$host
+case "$host_alias" in
+NONE)
+  case $nonopt in
+  NONE)
+    if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then :
+    else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; }
+    fi ;;
+  *) host_alias=$nonopt ;;
+  esac ;;
+esac
+
+host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias`
+host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+echo "$ac_t""$host" 1>&6
+
+echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6
+echo "configure:1820: checking for Cygwin environment" >&5
+if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1825 "configure"
+#include "confdefs.h"
+
+int main() {
+
+#ifndef __CYGWIN__
+#define __CYGWIN__ __CYGWIN32__
+#endif
+return __CYGWIN__;
+; return 0; }
+EOF
+if { (eval echo configure:1836: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+  ac_cv_cygwin=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_cygwin=no
+fi
+rm -f conftest*
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_cygwin" 1>&6
+CYGWIN=
+test "$ac_cv_cygwin" = yes && CYGWIN=yes
+echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6
+echo "configure:1853: checking for mingw32 environment" >&5
+if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1858 "configure"
+#include "confdefs.h"
+
+int main() {
+return __MINGW32__;
+; return 0; }
+EOF
+if { (eval echo configure:1865: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+  ac_cv_mingw32=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_mingw32=no
+fi
+rm -f conftest*
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_mingw32" 1>&6
+MINGW32=
+test "$ac_cv_mingw32" = yes && MINGW32=yes
+
+
+echo $ac_n "checking for executable suffix""... $ac_c" 1>&6
+echo "configure:1884: checking for executable suffix" >&5
+if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test "$CYGWIN" = yes || test "$MINGW32" = yes; then
+  ac_cv_exeext=.exe
+else
+  rm -f conftest*
+  echo 'int main () { return 0; }' > conftest.$ac_ext
+  ac_cv_exeext=
+  if { (eval echo configure:1894: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then
+    for file in conftest.*; do
+      case $file in
+      *.c | *.o | *.obj) ;;
+      *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;;
+      esac
+    done
+  else
+    { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; }
+  fi
+  rm -f conftest*
+  test x"${ac_cv_exeext}" = x && ac_cv_exeext=no
+fi
+fi
+
+EXEEXT=""
+test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext}
+echo "$ac_t""${ac_cv_exeext}" 1>&6
+ac_exeext=$EXEEXT
+
+
 # Decide whether to use libtool,
 # and if so whether to build shared, static, or both flavors of library.
-LTSHARED="no"
 # Check whether --enable-shared or --disable-shared was given.
 if test "${enable_shared+set}" = set; then
   enableval="$enable_shared"
-  LTSHARED="$enableval"
+  p=${PACKAGE-default}
+case $enableval in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+  enable_shared=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_shared=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac
+else
+  enable_shared=no
 fi
 
-LTSTATIC="no"
 # Check whether --enable-static or --disable-static was given.
 if test "${enable_static+set}" = set; then
   enableval="$enable_static"
-  LTSTATIC="$enableval"
+  p=${PACKAGE-default}
+case $enableval in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+  enable_static=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_static=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac
+else
+  enable_static=no
 fi
 
-if test "x$LTSHARED" != xno  -o  "x$LTSTATIC" != xno; then
+if test "x$enable_shared" != xno  -o  "x$enable_static" != xno; then
   USELIBTOOL="yes"
-  LIBTOOL="./libtool"
+# LIBTOOL="./libtool"
   O="lo"
   A="la"
   LN='$(LIBTOOL) --mode=link $(CC)'
   INSTALL_LIB='$(LIBTOOL) --mode=install ${INSTALL}'
   INSTALL_PROGRAM="\$(LIBTOOL) --mode=install $INSTALL_PROGRAM"
+  UNINSTALL='$(LIBTOOL) --mode=uninstall $(RM)'
 else
   USELIBTOOL="no"
   LIBTOOL=""
@@ -1542,6 +1975,7 @@ else
   A="a"
   LN='$(CC)'
   INSTALL_LIB="$INSTALL_DATA"
+  UNINSTALL='$(RM)'
 fi
 
 
 
 
 
+
 # Configure libtool if needed.
 if test $USELIBTOOL = yes; then
-  disable_shared=
-  disable_static=
-  if test "x$LTSHARED" = xno; then
-    disable_shared="--disable-shared"
-  fi
-  if test "x$LTSTATIC" = xno; then
-    disable_static="--disable-static"
-  fi
-  $srcdir/ltconfig $disable_shared $disable_static $srcdir/ltmain.sh
+  
+  
+  # Find the correct PATH separator.  Usually this is `:', but
+# DJGPP uses `;' like DOS.
+if test "X${PATH_SEPARATOR+set}" != Xset; then
+  UNAME=${UNAME-`uname 2>/dev/null`}
+  case X$UNAME in
+    *-DOS) lt_cv_sys_path_separator=';' ;;
+    *)     lt_cv_sys_path_separator=':' ;;
+  esac
+  PATH_SEPARATOR=$lt_cv_sys_path_separator
 fi
 
-# Select memory manager depending on user input.
-# If no "-enable-maxmem", use jmemnobs
-MEMORYMGR='jmemnobs.$(O)'
-MAXMEM="no"
-# Check whether --enable-maxmem or --disable-maxmem was given.
-if test "${enable_maxmem+set}" = set; then
-  enableval="$enable_maxmem"
-  MAXMEM="$enableval"
+# Check whether --enable-fast-install or --disable-fast-install was given.
+if test "${enable_fast_install+set}" = set; then
+  enableval="$enable_fast_install"
+  p=${PACKAGE-default}
+case $enableval in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+  enable_fast_install=no
+  # Look at the argument we got.  We use all the common list separators.
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+  for pkg in $enableval; do
+    if test "X$pkg" = "X$p"; then
+      enable_fast_install=yes
+    fi
+  done
+  IFS="$ac_save_ifs"
+  ;;
+esac
+else
+  enable_fast_install=yes
 fi
 
-# support --with-maxmem for backwards compatibility with IJG V5.
-# Check whether --with-maxmem or --without-maxmem was given.
-if test "${with_maxmem+set}" = set; then
-  withval="$with_maxmem"
-  MAXMEM="$withval"
+echo $ac_n "checking build system type""... $ac_c" 1>&6
+echo "configure:2027: checking build system type" >&5
+
+build_alias=$build
+case "$build_alias" in
+NONE)
+  case $nonopt in
+  NONE) build_alias=$host_alias ;;
+  *) build_alias=$nonopt ;;
+  esac ;;
+esac
+
+build=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $build_alias`
+build_cpu=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+build_vendor=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+build_os=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+echo "$ac_t""$build" 1>&6
+
+# Check whether --with-gnu-ld or --without-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then
+  withval="$with_gnu_ld"
+  test "$withval" = no || with_gnu_ld=yes
+else
+  with_gnu_ld=no
 fi
 
-if test "x$MAXMEM" = xyes; then
-  MAXMEM=1
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  echo $ac_n "checking for ld used by GCC""... $ac_c" 1>&6
+echo "configure:2056: checking for ld used by GCC" >&5
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | [A-Za-z]:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the path of ld
+      ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+      while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  echo $ac_n "checking for GNU ld""... $ac_c" 1>&6
+echo "configure:2086: checking for GNU ld" >&5
+else
+  echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6
+echo "configure:2089: checking for non-GNU ld" >&5
+fi
+if eval "test \"`echo '$''{'lt_cv_path_LD'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -z "$LD"; then
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some GNU ld's only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      if "$lt_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+       test "$with_gnu_ld" != no && break
+      else
+       test "$with_gnu_ld" != yes && break
+      fi
+    fi
+  done
+  IFS="$ac_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
 fi
-if test "x$MAXMEM" != xno; then
-  if test -n "`echo $MAXMEM | sed 's/[0-9]//g'`"; then
-    { echo "configure: error: non-numeric argument to --enable-maxmem" 1>&2; exit 1; }
-  fi
-  DEFAULTMAXMEM=`expr $MAXMEM \* 1048576`
-cat >> confdefs.h <<EOF
-#define DEFAULT_MAX_MEM ${DEFAULTMAXMEM}
-EOF
 
-echo $ac_n "checking for 'tmpfile()'""... $ac_c" 1>&6
-echo "configure:1596: checking for 'tmpfile()'" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1598 "configure"
-#include "confdefs.h"
-#include <stdio.h>
-int main() {
- FILE * tfile = tmpfile(); 
-; return 0; }
-EOF
-if { (eval echo configure:1605: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-MEMORYMGR='jmemansi.$(O)'
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  echo "$ac_t""$LD" 1>&6
 else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
   echo "$ac_t""no" 1>&6
-MEMORYMGR='jmemname.$(O)'
-cat >> confdefs.h <<\EOF
-#define NEED_SIGNAL_CATCHER 
-EOF
+fi
+test -z "$LD" && { echo "configure: error: no acceptable ld found in \$PATH" 1>&2; exit 1; }
+echo $ac_n "checking if the linker ($LD) is GNU ld""... $ac_c" 1>&6
+echo "configure:2124: checking if the linker ($LD) is GNU ld" >&5
+if eval "test \"`echo '$''{'lt_cv_prog_gnu_ld'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  # I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+  lt_cv_prog_gnu_ld=yes
+else
+  lt_cv_prog_gnu_ld=no
+fi
+fi
 
-echo $ac_n "checking for 'mktemp()'""... $ac_c" 1>&6
-echo "configure:1620: checking for 'mktemp()'" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1622 "configure"
+echo "$ac_t""$lt_cv_prog_gnu_ld" 1>&6
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+echo $ac_n "checking for $LD option to reload object files""... $ac_c" 1>&6
+echo "configure:2141: checking for $LD option to reload object files" >&5
+if eval "test \"`echo '$''{'lt_cv_ld_reload_flag'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  lt_cv_ld_reload_flag='-r'
+fi
+
+echo "$ac_t""$lt_cv_ld_reload_flag" 1>&6
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+
+echo $ac_n "checking for BSD-compatible nm""... $ac_c" 1>&6
+echo "configure:2153: checking for BSD-compatible nm" >&5
+if eval "test \"`echo '$''{'lt_cv_path_NM'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+    test -z "$ac_dir" && ac_dir=.
+    tmp_nm=$ac_dir/${ac_tool_prefix}nm
+    if test -f $tmp_nm || test -f $tmp_nm$ac_exeext ; then
+      # Check to see if the nm accepts a BSD-compat flag.
+      # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+      #   nm: unknown option "B" ignored
+      # Tru64's nm complains that /dev/null is an invalid object file
+      if ($tmp_nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep '(/dev/null|Invalid file or object type)' >/dev/null; then
+       lt_cv_path_NM="$tmp_nm -B"
+       break
+      elif ($tmp_nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+       lt_cv_path_NM="$tmp_nm -p"
+       break
+      else
+       lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+       continue # so that we can try to find one that supports BSD flags
+      fi
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi
+fi
+
+NM="$lt_cv_path_NM"
+echo "$ac_t""$NM" 1>&6
+
+echo $ac_n "checking for a sed that does not truncate output""... $ac_c" 1>&6
+echo "configure:2191: checking for a sed that does not truncate output" >&5
+if eval "test \"`echo '$''{'lt_cv_path_SED'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  # Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_executable_p="test -f"
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+        _sed_list="$_sed_list $as_dir/$ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+
+  # Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+  trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+: ${TMPDIR=/tmp}
+{
+  tmp=`(umask 077 && mktemp -d -q "$TMPDIR/sedXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=$TMPDIR/sed$$-$RANDOM
+  (umask 077 && mkdir $tmp)
+} ||
+{
+   echo "$me: cannot create a temporary directory in $TMPDIR" >&2
+   { (exit 1); exit 1; }
+}
+  _max=0
+  _count=0
+  # Add /usr/xpg4/bin/sed as it is typically found on Solaris
+  # along with /bin/sed that truncates output.
+  for _sed in $_sed_list /usr/xpg4/bin/sed; do
+    test ! -f ${_sed} && break
+    cat /dev/null > "$tmp/sed.in"
+    _count=0
+    echo ${ECHO_N-$ac_n} "0123456789${ECHO_C-$ac_c}" >"$tmp/sed.in"
+    # Check for GNU sed and select it if it is found.
+    if "${_sed}" --version 2>&1 < /dev/null | egrep '(GNU)' > /dev/null; then
+      lt_cv_path_SED=${_sed}
+      break
+    fi
+    while true; do
+      cat "$tmp/sed.in" "$tmp/sed.in" >"$tmp/sed.tmp"
+      mv "$tmp/sed.tmp" "$tmp/sed.in"
+      cp "$tmp/sed.in" "$tmp/sed.nl"
+      echo >>"$tmp/sed.nl"
+      ${_sed} -e 's/a$//' < "$tmp/sed.nl" >"$tmp/sed.out" || break
+      cmp -s "$tmp/sed.out" "$tmp/sed.nl" || break
+      # 40000 chars as input seems more than enough
+      test $_count -gt 10 && break
+      _count=`expr $_count + 1`
+      if test $_count -gt $_max; then
+        _max=$_count
+        lt_cv_path_SED=$_sed
+      fi
+    done
+  done
+  rm -rf "$tmp"
+
+fi
+
+if test "X$SED" != "X"; then
+  lt_cv_path_SED=$SED
+else
+  SED=$lt_cv_path_SED
+fi
+echo "$ac_t""$SED" 1>&6
+
+echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6
+echo "configure:2275: checking whether ln -s works" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  rm -f conftestdata
+if ln -s X conftestdata 2>/dev/null
+then
+  rm -f conftestdata
+  ac_cv_prog_LN_S="ln -s"
+else
+  ac_cv_prog_LN_S=ln
+fi
+fi
+LN_S="$ac_cv_prog_LN_S"
+if test "$ac_cv_prog_LN_S" = "ln -s"; then
+  echo "$ac_t""yes" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+echo $ac_n "checking how to recognise dependent libraries""... $ac_c" 1>&6
+echo "configure:2296: checking how to recognise dependent libraries" >&5
+if eval "test \"`echo '$''{'lt_cv_deplibs_check_method'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix4* | aix5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi4*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin* | mingw* | pw32*)
+  lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  case "$host_os" in
+  rhapsody* | darwin1.[012])
+    lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1`
+    ;;
+  *) # Darwin 1.3 on
+    lt_cv_file_magic_test_file='/usr/lib/libSystem.dylib'
+    ;;
+  esac
+  ;;
+
+freebsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[3-9]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20*|hpux11*)
+  lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libc.sl
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+  irix5* | nonstopux*)
+    # this will be overridden with pass_all, but let us keep it just in case
+    lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+    ;;
+  *)
+    case $LD in
+    *-32|*"-32 ") libmagic=32-bit;;
+    *-n32|*"-n32 ") libmagic=N32;;
+    *-64|*"-64 ") libmagic=64-bit;;
+    *) libmagic=never-match;;
+    esac
+    # this will be overridden with pass_all, but let us keep it just in case
+    lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"
+    ;;
+  esac
+  lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+  case $host_cpu in
+  alpha* | hppa* | i*86 | mips | mipsel | powerpc* | sparc* | ia64* | s390* | x86_64*)
+    lt_cv_deplibs_check_method=pass_all ;;
+  *)
+    # glibc up to 2.1.1 does not perform some relocations on ARM
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;
+  esac
+  lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so\.[0-9]+\.[0-9]+$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+openbsd*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB shared object'
+  else
+    lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  # this will be overridden with pass_all, but let us keep it just in case
+  lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sco3.2v5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  lt_cv_file_magic_test_file=/lib/libc.so
+  ;;
+
+sysv5uw[78]* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+esac
+
+fi
+
+echo "$ac_t""$lt_cv_deplibs_check_method" 1>&6
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+
+echo $ac_n "checking for object suffix""... $ac_c" 1>&6
+echo "configure:2482: checking for object suffix" >&5
+if eval "test \"`echo '$''{'ac_cv_objext'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  rm -f conftest*
+echo 'int i = 1;' > conftest.$ac_ext
+if { (eval echo configure:2488: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  for ac_file in conftest.*; do
+    case $ac_file in
+    *.c) ;;
+    *) ac_cv_objext=`echo $ac_file | sed -e s/conftest.//` ;;
+    esac
+  done
+else
+  { echo "configure: error: installation or configuration problem; compiler does not work" 1>&2; exit 1; }
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_objext" 1>&6
+OBJEXT=$ac_cv_objext
+ac_objext=$ac_cv_objext
+
+if test $host != $build; then
+  ac_tool_prefix=${host_alias}-
+else
+  ac_tool_prefix=
+fi
+
+
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+echo $ac_n "checking command to parse $NM output""... $ac_c" 1>&6
+echo "configure:2516: checking command to parse $NM output" >&5
+if eval "test \"`echo '$''{'lt_cv_sys_global_symbol_pipe'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Transform the above into a raw symbol and a C symbol.
+symxfrm='\1 \2\3 \3'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris* | sysv5*)
+  symcode='[BDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $host_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
+  symcode='[ABCDGISTW]'
+fi
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Write the raw and C identifiers.
+lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[        ]\($symcode$symcode*\)[         ][      ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if { (eval echo configure:2599: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { (eval echo configure:2602: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist\") 1>&5; (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) 2>&5; } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+       mv -f "$nlist"T "$nlist"
+      else
+       rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if egrep ' nm_test_var$' "$nlist" >/dev/null; then
+       if egrep ' nm_test_func$' "$nlist" >/dev/null; then
+         cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+         # Now generate the symbol file.
+         eval "$lt_cv_global_symbol_to_cdecl"' < "$nlist" >> conftest.$ac_ext'
+
+         cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr void *
+#else
+# define lt_ptr char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr address;
+}
+lt_preloaded_symbols[] =
+{
+EOF
+         sed "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext
+         cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+         # Now try linking the two files.
+         mv conftest.$ac_objext conftstm.$ac_objext
+         save_LIBS="$LIBS"
+         save_CFLAGS="$CFLAGS"
+         LIBS="conftstm.$ac_objext"
+         CFLAGS="$CFLAGS$no_builtin_flag"
+         if { (eval echo configure:2653: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest$ac_exeext; then
+           pipe_works=yes
+         fi
+         LIBS="$save_LIBS"
+         CFLAGS="$save_CFLAGS"
+       else
+         echo "cannot find nm_test_func in $nlist" >&5
+       fi
+      else
+       echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+global_symbol_pipe="$lt_cv_sys_global_symbol_pipe"
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  global_symbol_to_cdecl=
+  global_symbol_to_c_name_address=
+else
+  global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl"
+  global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address"
+fi
+if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address";
+then
+  echo "$ac_t""failed" 1>&6
+else
+  echo "$ac_t""ok" 1>&6
+fi
+
+for ac_hdr in dlfcn.h
+do
+ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+echo "configure:2702: checking for $ac_hdr" >&5
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 2707 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:2712: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=yes"
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+    ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'`
+  cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+else
+  echo "$ac_t""no" 1>&6
+fi
+done
+
+
+
+
+
+
+# Only perform the check for file, if the check method requires it
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    echo $ac_n "checking for ${ac_tool_prefix}file""... $ac_c" 1>&6
+echo "configure:2748: checking for ${ac_tool_prefix}file" >&5
+if eval "test \"`echo '$''{'lt_cv_path_MAGIC_CMD'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  case $MAGIC_CMD in
+  /*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+  ?:/*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path.
+  ;;
+  *)
+  ac_save_MAGIC_CMD="$MAGIC_CMD"
+  IFS="${IFS=   }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="/usr/bin:$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/${ac_tool_prefix}file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           egrep "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  MAGIC_CMD="$ac_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  echo "$ac_t""$MAGIC_CMD" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    echo $ac_n "checking for file""... $ac_c" 1>&6
+echo "configure:2810: checking for file" >&5
+if eval "test \"`echo '$''{'lt_cv_path_MAGIC_CMD'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  case $MAGIC_CMD in
+  /*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+  ?:/*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path.
+  ;;
+  *)
+  ac_save_MAGIC_CMD="$MAGIC_CMD"
+  IFS="${IFS=   }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="/usr/bin:$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           egrep "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  MAGIC_CMD="$ac_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  echo "$ac_t""$MAGIC_CMD" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+  else
+    MAGIC_CMD=:
+  fi
+fi
+
+  fi
+  ;;
+esac
+
+# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:2881: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+  echo "$ac_t""$RANLIB" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+if test -z "$ac_cv_prog_RANLIB"; then
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:2913: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_RANLIB="ranlib"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+  echo "$ac_t""$RANLIB" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+else
+  RANLIB=":"
+fi
+fi
+
+# Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:2948: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_STRIP'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+  echo "$ac_t""$STRIP" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+if test -z "$ac_cv_prog_STRIP"; then
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:2980: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_STRIP'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_STRIP="strip"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":"
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+  echo "$ac_t""$STRIP" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+else
+  STRIP=":"
+fi
+fi
+
+
+enable_dlopen=yes
+enable_win32_dll=yes
+
+# Check whether --enable-libtool-lock or --disable-libtool-lock was given.
+if test "${enable_libtool_lock+set}" = set; then
+  enableval="$enable_libtool_lock"
+  :
+fi
+
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '#line 3029 "configure"' > conftest.$ac_ext
+  if { (eval echo configure:3030: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -32"
+      ;;
+    *N32*)
+      LD="${LD-ld} -n32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -64"
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  echo $ac_n "checking whether the C compiler needs -belf""... $ac_c" 1>&6
+echo "configure:3051: checking whether the C compiler needs -belf" >&5
+if eval "test \"`echo '$''{'lt_cv_cc_needs_belf'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  
+     ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+     cat > conftest.$ac_ext <<EOF
+#line 3064 "configure"
+#include "confdefs.h"
+
+int main() {
+
+; return 0; }
+EOF
+if { (eval echo configure:3071: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  lt_cv_cc_needs_belf=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  lt_cv_cc_needs_belf=no
+fi
+rm -f conftest*
+     ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+fi
+
+echo "$ac_t""$lt_cv_cc_needs_belf" 1>&6
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+
+*-*-cygwin* | *-*-mingw* | *-*-pw32*)
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3101: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_DLLTOOL'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+DLLTOOL="$ac_cv_prog_DLLTOOL"
+if test -n "$DLLTOOL"; then
+  echo "$ac_t""$DLLTOOL" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+if test -z "$ac_cv_prog_DLLTOOL"; then
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3133: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_DLLTOOL'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_DLLTOOL="dlltool"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_DLLTOOL" && ac_cv_prog_DLLTOOL="false"
+fi
+fi
+DLLTOOL="$ac_cv_prog_DLLTOOL"
+if test -n "$DLLTOOL"; then
+  echo "$ac_t""$DLLTOOL" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+else
+  DLLTOOL="false"
+fi
+fi
+
+  # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args.
+set dummy ${ac_tool_prefix}as; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3168: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_AS'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$AS"; then
+  ac_cv_prog_AS="$AS" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_AS="${ac_tool_prefix}as"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+AS="$ac_cv_prog_AS"
+if test -n "$AS"; then
+  echo "$ac_t""$AS" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+if test -z "$ac_cv_prog_AS"; then
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "as", so it can be a program name with args.
+set dummy as; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3200: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_AS'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$AS"; then
+  ac_cv_prog_AS="$AS" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_AS="as"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_AS" && ac_cv_prog_AS="false"
+fi
+fi
+AS="$ac_cv_prog_AS"
+if test -n "$AS"; then
+  echo "$ac_t""$AS" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+else
+  AS="false"
+fi
+fi
+
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3235: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_OBJDUMP'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+OBJDUMP="$ac_cv_prog_OBJDUMP"
+if test -n "$OBJDUMP"; then
+  echo "$ac_t""$OBJDUMP" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+if test -z "$ac_cv_prog_OBJDUMP"; then
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:3267: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_OBJDUMP'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_OBJDUMP="objdump"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_OBJDUMP" && ac_cv_prog_OBJDUMP="false"
+fi
+fi
+OBJDUMP="$ac_cv_prog_OBJDUMP"
+if test -n "$OBJDUMP"; then
+  echo "$ac_t""$OBJDUMP" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+else
+  OBJDUMP="false"
+fi
+fi
+
+
+  # recent cygwin and mingw systems supply a stub DllMain which the user
+  # can override, but on older systems we have to supply one
+  echo $ac_n "checking if libtool should supply DllMain function""... $ac_c" 1>&6
+echo "configure:3303: checking if libtool should supply DllMain function" >&5
+if eval "test \"`echo '$''{'lt_cv_need_dllmain'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 3308 "configure"
+#include "confdefs.h"
+
+int main() {
+extern int __attribute__((__stdcall__)) DllMain(void*, int, void*);
+      DllMain (0, 0, 0);
+; return 0; }
+EOF
+if { (eval echo configure:3316: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  lt_cv_need_dllmain=no
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  lt_cv_need_dllmain=yes
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$lt_cv_need_dllmain" 1>&6
+
+  case $host/$CC in
+  *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*)
+    # old mingw systems require "-dll" to link a DLL, while more recent ones
+    # require "-mdll"
+    SAVE_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS -mdll"
+    echo $ac_n "checking how to link DLLs""... $ac_c" 1>&6
+echo "configure:3337: checking how to link DLLs" >&5
+if eval "test \"`echo '$''{'lt_cv_cc_dll_switch'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 3342 "configure"
+#include "confdefs.h"
+
+int main() {
+
+; return 0; }
+EOF
+if { (eval echo configure:3349: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  lt_cv_cc_dll_switch=-mdll
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  lt_cv_cc_dll_switch=-dll
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$lt_cv_cc_dll_switch" 1>&6
+    CFLAGS="$SAVE_CFLAGS" ;;
+  *-*-cygwin* | *-*-pw32*)
+    # cygwin systems need to pass --dll to the linker, and not link
+    # crt.o which will require a WinMain@16 definition.
+    lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;;
+  esac
+  ;;
+  
+esac
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e s/^X//'
+sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Constants:
+rm="rm -f"
+
+# Global variables:
+default_ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except M$VC,
+# which needs '.lib').
+libext=a
+ltmain="$ac_aux_dir/ltmain.sh"
+ofile="$default_ofile"
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+need_locks="$enable_libtool_lock"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AR_FLAGS" && AR_FLAGS=cru
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+test -z "$NM" && NM=nm
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$ac_objext" && ac_objext=o
+
+if test x"$host" != x"$build"; then
+  ac_tool_prefix=${host_alias}-
+else
+  ac_tool_prefix=
+fi
+
+# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
+case $host_os in
+linux-gnu*) ;;
+linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
+esac
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds"
+    ;;
+  *)
+    old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+fi
+
+# Allow CC to be a program name with arguments.
+set dummy $CC
+compiler="$2"
+
+echo $ac_n "checking for objdir""... $ac_c" 1>&6
+echo "configure:3463: checking for objdir" >&5
+rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+echo "$ac_t""$objdir" 1>&6
+
+
+# Check whether --with-pic or --without-pic was given.
+if test "${with_pic+set}" = set; then
+  withval="$with_pic"
+  pic_mode="$withval"
+else
+  pic_mode=default
+fi
+
+test -z "$pic_mode" && pic_mode=default
+
+# We assume here that the value for lt_cv_prog_cc_pic will not be cached
+# in isolation, and that seeing it set (from the cache) indicates that
+# the associated values are set (in the cache) correctly too.
+echo $ac_n "checking for $compiler option to produce PIC""... $ac_c" 1>&6
+echo "configure:3490: checking for $compiler option to produce PIC" >&5
+if eval "test \"`echo '$''{'lt_cv_prog_cc_pic'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+   lt_cv_prog_cc_pic=
+  lt_cv_prog_cc_shlib=
+  lt_cv_prog_cc_wl=
+  lt_cv_prog_cc_static=
+  lt_cv_prog_cc_no_builtin=
+  lt_cv_prog_cc_can_build_shared=$can_build_shared
+
+  if test "$GCC" = yes; then
+    lt_cv_prog_cc_wl='-Wl,'
+    lt_cv_prog_cc_static='-static'
+
+    case $host_os in
+    aix*)
+      # Below there is a dirty hack to force normal static linking with -ldl
+      # The problem is because libdl dynamically linked with both libc and
+      # libC (AIX C++ library), which obviously doesn't included in libraries
+      # list by gcc. This cause undefined symbols with -static flags.
+      # This hack allows C programs to be linked with "-static -ldl", but
+      # not sure about C++ programs.
+      lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC"
+      ;;
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4'
+      ;;
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_cv_prog_cc_pic='-fno-common'
+      ;;
+    cygwin* | mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_cv_prog_cc_pic='-DDLL_EXPORT'
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+        lt_cv_prog_cc_pic=-Kconform_pic
+      fi
+      ;;
+    *)
+      lt_cv_prog_cc_pic='-fPIC'
+      ;;
+    esac
+  else
+    # PORTME Check for PIC flags for the system compiler.
+    case $host_os in
+    aix3* | aix4* | aix5*)
+      lt_cv_prog_cc_wl='-Wl,'
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_cv_prog_cc_static='-Bstatic'
+      else
+       lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      # Is there a better lt_cv_prog_cc_static that works with the bundled CC?
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static="${lt_cv_prog_cc_wl}-a ${lt_cv_prog_cc_wl}archive"
+      lt_cv_prog_cc_pic='+Z'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static='-non_shared'
+      # PIC (with -KPIC) is the default.
+      ;;
+
+    cygwin* | mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_cv_prog_cc_pic='-DDLL_EXPORT'
+      ;;
+
+    newsos6)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      # All OSF/1 code is PIC.
+      lt_cv_prog_cc_wl='-Wl,'
+      lt_cv_prog_cc_static='-non_shared'
+      ;;
+
+    sco3.2v5*)
+      lt_cv_prog_cc_pic='-Kpic'
+      lt_cv_prog_cc_static='-dn'
+      lt_cv_prog_cc_shlib='-belf'
+      ;;
+
+    solaris*)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Wl,'
+      ;;
+
+    sunos4*)
+      lt_cv_prog_cc_pic='-PIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Qoption ld '
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+      lt_cv_prog_cc_pic='-KPIC'
+      lt_cv_prog_cc_static='-Bstatic'
+      lt_cv_prog_cc_wl='-Wl,'
+      ;;
+
+    uts4*)
+      lt_cv_prog_cc_pic='-pic'
+      lt_cv_prog_cc_static='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       lt_cv_prog_cc_pic='-Kconform_pic'
+       lt_cv_prog_cc_static='-Bstatic'
+      fi
+      ;;
+
+    *)
+      lt_cv_prog_cc_can_build_shared=no
+      ;;
+    esac
+  fi
+
+fi
+
+if test -z "$lt_cv_prog_cc_pic"; then
+  echo "$ac_t""none" 1>&6
+else
+  echo "$ac_t""$lt_cv_prog_cc_pic" 1>&6
+
+  # Check to make sure the pic_flag actually works.
+  echo $ac_n "checking if $compiler PIC flag $lt_cv_prog_cc_pic works""... $ac_c" 1>&6
+echo "configure:3638: checking if $compiler PIC flag $lt_cv_prog_cc_pic works" >&5
+  if eval "test \"`echo '$''{'lt_cv_prog_cc_pic_works'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+      save_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS $lt_cv_prog_cc_pic -DPIC"
+    cat > conftest.$ac_ext <<EOF
+#line 3645 "configure"
+#include "confdefs.h"
+
+int main() {
+
+; return 0; }
+EOF
+if { (eval echo configure:3652: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+        case $host_os in
+      hpux9* | hpux10* | hpux11*)
+       # On HP-UX, both CC and GCC only warn that PIC is supported... then
+       # they create non-PIC objects.  So, if there were any warnings, we
+       # assume that PIC is not supported.
+       if test -s conftest.err; then
+         lt_cv_prog_cc_pic_works=no
+       else
+         lt_cv_prog_cc_pic_works=yes
+       fi
+       ;;
+      *)
+       lt_cv_prog_cc_pic_works=yes
+       ;;
+      esac
+    
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+        lt_cv_prog_cc_pic_works=no
+    
+fi
+rm -f conftest*
+    CFLAGS="$save_CFLAGS"
+  
+fi
+
+
+  if test "X$lt_cv_prog_cc_pic_works" = Xno; then
+    lt_cv_prog_cc_pic=
+    lt_cv_prog_cc_can_build_shared=no
+  else
+    lt_cv_prog_cc_pic=" $lt_cv_prog_cc_pic"
+  fi
+
+  echo "$ac_t""$lt_cv_prog_cc_pic_works" 1>&6
+fi
+
+# Check for any special shared library compilation flags.
+if test -n "$lt_cv_prog_cc_shlib"; then
+  echo "configure: warning: \`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries" 1>&2
+  if echo "$old_CC $old_CFLAGS " | egrep -e "[         ]$lt_cv_prog_cc_shlib[  ]" >/dev/null; then :
+  else
+   echo "configure: warning: add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure" 1>&2
+    lt_cv_prog_cc_can_build_shared=no
+  fi
+fi
+
+echo $ac_n "checking if $compiler static flag $lt_cv_prog_cc_static works""... $ac_c" 1>&6
+echo "configure:3704: checking if $compiler static flag $lt_cv_prog_cc_static works" >&5
+if eval "test \"`echo '$''{'lt_cv_prog_cc_static_works'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+    lt_cv_prog_cc_static_works=no
+  save_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS $lt_cv_prog_cc_static"
+  cat > conftest.$ac_ext <<EOF
+#line 3712 "configure"
+#include "confdefs.h"
+
+int main() {
+
+; return 0; }
+EOF
+if { (eval echo configure:3719: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  lt_cv_prog_cc_static_works=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+fi
+rm -f conftest*
+  LDFLAGS="$save_LDFLAGS"
+
+fi
+
+
+# Belt *and* braces to stop my trousers falling down:
+test "X$lt_cv_prog_cc_static_works" = Xno && lt_cv_prog_cc_static=
+echo "$ac_t""$lt_cv_prog_cc_static_works" 1>&6
+
+pic_flag="$lt_cv_prog_cc_pic"
+special_shlib_compile_flags="$lt_cv_prog_cc_shlib"
+wl="$lt_cv_prog_cc_wl"
+link_static_flag="$lt_cv_prog_cc_static"
+no_builtin_flag="$lt_cv_prog_cc_no_builtin"
+can_build_shared="$lt_cv_prog_cc_can_build_shared"
+
+
+# Check to see if options -o and -c are simultaneously supported by compiler
+echo $ac_n "checking if $compiler supports -c -o file.$ac_objext""... $ac_c" 1>&6
+echo "configure:3746: checking if $compiler supports -c -o file.$ac_objext" >&5
+if eval "test \"`echo '$''{'lt_cv_compiler_c_o'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  
+$rm -r conftest 2>/dev/null
+mkdir conftest
+cd conftest
+echo "int some_variable = 0;" > conftest.$ac_ext
+mkdir out
+# According to Tom Tromey, Ian Lance Taylor reported there are C compilers
+# that will create temporary files in the current directory regardless of
+# the output directory.  Thus, making CWD read-only will cause this test
+# to fail, enabling locking or at least warning the user not to do parallel
+# builds.
+chmod -w .
+save_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS -o out/conftest2.$ac_objext"
+compiler_c_o=no
+if { (eval echo configure:3765: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.$ac_objext; then
+  # The compiler can only warn and ignore the option if not recognized
+  # So say no if there are warnings
+  if test -s out/conftest.err; then
+    lt_cv_compiler_c_o=no
+  else
+    lt_cv_compiler_c_o=yes
+  fi
+else
+  # Append any errors to the config.log.
+  cat out/conftest.err 1>&5
+  lt_cv_compiler_c_o=no
+fi
+CFLAGS="$save_CFLAGS"
+chmod u+w .
+$rm conftest* out/*
+rmdir out
+cd ..
+rmdir conftest
+$rm -r conftest 2>/dev/null
+
+fi
+
+compiler_c_o=$lt_cv_compiler_c_o
+echo "$ac_t""$compiler_c_o" 1>&6
+
+if test x"$compiler_c_o" = x"yes"; then
+  # Check to see if we can write to a .lo
+  echo $ac_n "checking if $compiler supports -c -o file.lo""... $ac_c" 1>&6
+echo "configure:3794: checking if $compiler supports -c -o file.lo" >&5
+  if eval "test \"`echo '$''{'lt_cv_compiler_o_lo'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  
+  lt_cv_compiler_o_lo=no
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -c -o conftest.lo"
+  save_objext="$ac_objext"
+  ac_objext=lo
+  cat > conftest.$ac_ext <<EOF
+#line 3805 "configure"
+#include "confdefs.h"
+
+int main() {
+int some_variable = 0;
+; return 0; }
+EOF
+if { (eval echo configure:3812: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+      # The compiler can only warn and ignore the option if not recognized
+    # So say no if there are warnings
+    if test -s conftest.err; then
+      lt_cv_compiler_o_lo=no
+    else
+      lt_cv_compiler_o_lo=yes
+    fi
+  
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+fi
+rm -f conftest*
+  ac_objext="$save_objext"
+  CFLAGS="$save_CFLAGS"
+  
+fi
+
+  compiler_o_lo=$lt_cv_compiler_o_lo
+  echo "$ac_t""$compiler_o_lo" 1>&6
+else
+  compiler_o_lo=no
+fi
+
+# Check to see if we can do hard links to lock some files if needed
+hard_links="nottested"
+if test "$compiler_c_o" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  echo $ac_n "checking if we can lock with hard links""... $ac_c" 1>&6
+echo "configure:3843: checking if we can lock with hard links" >&5
+  hard_links=yes
+  $rm conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  echo "$ac_t""$hard_links" 1>&6
+  if test "$hard_links" = no; then
+    echo "configure: warning: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" 1>&2
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+if test "$GCC" = yes; then
+  # Check to see if options -fno-rtti -fno-exceptions are supported by compiler
+  echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions""... $ac_c" 1>&6
+echo "configure:3862: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.$ac_ext"
+  compiler_rtti_exceptions=no
+  cat > conftest.$ac_ext <<EOF
+#line 3868 "configure"
+#include "confdefs.h"
+
+int main() {
+int some_variable = 0;
+; return 0; }
+EOF
+if { (eval echo configure:3875: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+      # The compiler can only warn and ignore the option if not recognized
+    # So say no if there are warnings
+    if test -s conftest.err; then
+      compiler_rtti_exceptions=no
+    else
+      compiler_rtti_exceptions=yes
+    fi
+  
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+fi
+rm -f conftest*
+  CFLAGS="$save_CFLAGS"
+  echo "$ac_t""$compiler_rtti_exceptions" 1>&6
+
+  if test "$compiler_rtti_exceptions" = "yes"; then
+    no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions'
+  else
+    no_builtin_flag=' -fno-builtin'
+  fi
+fi
+
+# See if the linker supports building shared libraries.
+echo $ac_n "checking whether the linker ($LD) supports shared libraries""... $ac_c" 1>&6
+echo "configure:3902: checking whether the linker ($LD) supports shared libraries" >&5
+
+allow_undefined_flag=
+no_undefined_flag=
+need_lib_prefix=unknown
+need_version=unknown
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+archive_cmds=
+archive_expsym_cmds=
+old_archive_from_new_cmds=
+old_archive_from_expsyms_cmds=
+export_dynamic_flag_spec=
+whole_archive_flag_spec=
+thread_safe_flag_spec=
+hardcode_into_libs=no
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+hardcode_shlibpath_var=unsupported
+runpath_var=
+link_all_deplibs=unknown
+always_export_symbols=no
+export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols'
+# include_expsyms should be a list of space-separated symbols to be *always*
+# included in the symbol list
+include_expsyms=
+# exclude_expsyms can be an egrep regular expression of symbols to exclude
+# it will be wrapped by ` (' and `)$', so one must not match beginning or
+# end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+# as well as any symbol that contains `d'.
+exclude_expsyms="_GLOBAL_OFFSET_TABLE_"
+# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+# platforms (ab)use it in PIC code, but their linkers get confused if
+# the symbol is explicitly referenced.  Since portable code cannot
+# rely on this symbol name, it's probably fine to never include it in
+# preloaded symbol tables.
+extract_expsyms_cmds=
+
+case $host_os in
+cygwin* | mingw* | pw32*)
+  # FIXME: the MSVC++ port hasn't been tested in a loooong time
+  # When not using gcc, we currently assume that we are using
+  # Microsoft Visual C++.
+  if test "$GCC" != yes; then
+    with_gnu_ld=no
+  fi
+  ;;
+openbsd*)
+  with_gnu_ld=no
+  ;;
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+  # If archive_cmds runs LD, not CC, wlarc should be empty
+  wlarc='${wl}'
+
+  # See if GNU ld supports shared libraries.
+  case $host_os in
+  aix3* | aix4* | aix5*)
+    # On AIX, the GNU linker is very broken
+    # Note:Check GNU linker on AIX 5-IA64 when/if it becomes available.
+    ld_shlibs=no
+    cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+    ;;
+
+  amigaos*)
+    archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+
+    # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+    # that the semantics of dynamic libraries on AmigaOS, at least up
+    # to version 4, is to share data among multiple programs linked
+    # with the same dynamic library.  Since this doesn't match the
+    # behavior of shared libraries on other platforms, we can use
+    # them.
+    ld_shlibs=no
+    ;;
+
+  beos*)
+    if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      allow_undefined_flag=unsupported
+      # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+      # support --undefined.  This deserves some investigation.  FIXME
+      archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # hardcode_libdir_flag_spec is actually meaningless, as there is
+    # no search path for DLLs.
+    hardcode_libdir_flag_spec='-L$libdir'
+    allow_undefined_flag=unsupported
+    always_export_symbols=yes
+
+    extract_expsyms_cmds='test -f $output_objdir/impgen.c || \
+      sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //;s/^# *$//; p; }" -e d < $''0 > $output_objdir/impgen.c~
+      test -f $output_objdir/impgen.exe || (cd $output_objdir && \
+      if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \
+      else $CC -o impgen impgen.c ; fi)~
+      $output_objdir/impgen $dir/$soroot > $output_objdir/$soname-def'
+
+    old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib'
+
+    # cygwin and mingw dlls have different entry points and sets of symbols
+    # to exclude.
+    # FIXME: what about values for MSVC?
+    dll_entry=__cygwin_dll_entry@12
+    dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~
+    case $host_os in
+    mingw*)
+      # mingw values
+      dll_entry=_DllMainCRTStartup@12
+      dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~
+      ;;
+    esac
+
+    # mingw and cygwin differ, and it's simplest to just exclude the union
+    # of the two symbol sets.
+    dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12
+
+    # recent cygwin and mingw systems supply a stub DllMain which the user
+    # can override, but on older systems we have to supply one (in ltdll.c)
+    if test "x$lt_cv_need_dllmain" = "xyes"; then
+      ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext "
+      ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~
+       test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~'
+    else
+      ltdll_obj=
+      ltdll_cmds=
+    fi
+
+    # Extract the symbol export list from an `--export-all' def file,
+    # then regenerate the def file from the symbol export list, so that
+    # the compiled dll only exports the symbol export list.
+    # Be careful not to strip the DATA tag left be newer dlltools.
+    export_symbols_cmds="$ltdll_cmds"'
+      $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~
+      sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols'
+
+    # If the export-symbols file already is a .def file (1st line
+    # is EXPORTS), use it as is.
+    # If DATA tags from a recent dlltool are present, honour them!
+    archive_expsym_cmds='if test "x`sed 1q $export_symbols`" = xEXPORTS; then
+       cp $export_symbols $output_objdir/$soname-def;
+      else
+       echo EXPORTS > $output_objdir/$soname-def;
+       _lt_hint=1;
+       cat $export_symbols | while read symbol; do
+        set dummy \$symbol;
+        case \$# in
+          2) echo "   \$2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
+          4) echo "   \$2 \$3 \$4 ; " >> $output_objdir/$soname-def; _lt_hint=`expr \$_lt_hint - 1`;;
+          *) echo "     \$2 @ \$_lt_hint \$3 ; " >> $output_objdir/$soname-def;;
+        esac;
+        _lt_hint=`expr 1 + \$_lt_hint`;
+       done;
+      fi~
+      '"$ltdll_cmds"'
+      $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+      $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+      $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+      $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp --output-lib $output_objdir/$libname.dll.a~
+      $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags'
+    ;;
+
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+      wlarc=
+    else
+      archive_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    fi
+    ;;
+
+  solaris* | sysv5*)
+    if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then
+      ld_shlibs=no
+      cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+    elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+
+  sunos4*)
+    archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+    wlarc=
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  *)
+    if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+    else
+      ld_shlibs=no
+    fi
+    ;;
+  esac
+
+  if test "$ld_shlibs" = yes; then
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
+    export_dynamic_flag_spec='${wl}--export-dynamic'
+    case $host_os in
+    cygwin* | mingw* | pw32*)
+      # dlltool doesn't understand --whole-archive et. al.
+      whole_archive_flag_spec=
+      ;;
+    *)
+      # ancient GNU ld didn't support --whole-archive et. al.
+      if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then
+       whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+      else
+       whole_archive_flag_spec=
+      fi
+      ;;
+    esac
+  fi
+else
+  # PORTME fill in a description of your system's linker (not GNU ld)
+  case $host_os in
+  aix3*)
+    allow_undefined_flag=unsupported
+    always_export_symbols=yes
+    archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+    # Note: this linker hardcodes the directories in LIBPATH if there
+    # are no directories specified by -L.
+    hardcode_minus_L=yes
+    if test "$GCC" = yes && test -z "$link_static_flag"; then
+      # Neither direct hardcoding nor static linking is supported with a
+      # broken collect2.
+      hardcode_direct=unsupported
+    fi
+    ;;
+
+  aix4* | aix5*)
+    if test "$host_cpu" = ia64; then
+      # On IA64, the linker does run time linking by default, so we don't
+      # have to do anything special.
+      aix_use_runtimelinking=no
+      exp_sym_flag='-Bexport'
+      no_entry_flag=""
+    else
+      aix_use_runtimelinking=no
+
+      # Test if we are trying to use run time linking or normal
+      # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+      # need to do runtime linking.
+      case $host_os in aix4.[23]|aix4.[23].*|aix5*)
+       for ld_flag in $LDFLAGS; do
+         case $ld_flag in
+         *-brtl*)
+           aix_use_runtimelinking=yes
+           break
+         ;;
+         esac
+       done
+      esac
+
+      exp_sym_flag='-bexport'
+      no_entry_flag='-bnoentry'
+    fi
+
+    # When large executables or shared objects are built, AIX ld can
+    # have problems creating the table of contents.  If linking a library
+    # or program results in "error TOC overflow" add -mminimal-toc to
+    # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+    # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+    hardcode_direct=yes
+    archive_cmds=''
+    hardcode_libdir_separator=':'
+    if test "$GCC" = yes; then
+      case $host_os in aix4.[012]|aix4.[012].*)
+       collect2name=`${CC} -print-prog-name=collect2`
+       if test -f "$collect2name" && \
+         strings "$collect2name" | grep resolve_lib_name >/dev/null
+       then
+         # We have reworked collect2
+         hardcode_direct=yes
+       else
+         # We have old collect2
+         hardcode_direct=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         hardcode_minus_L=yes
+         hardcode_libdir_flag_spec='-L$libdir'
+         hardcode_libdir_separator=
+       fi
+      esac
+
+      shared_flag='-shared'
+    else
+      # not using gcc
+      if test "$host_cpu" = ia64; then
+       shared_flag='${wl}-G'
+      else
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag='${wl}-G'
+       else
+         shared_flag='${wl}-bM:SRE'
+       fi
+      fi
+    fi
+
+    # It seems that -bexpall can do strange things, so it is better to
+    # generate a list of symbols to export.
+    always_export_symbols=yes
+    if test "$aix_use_runtimelinking" = yes; then
+      # Warning - without using the other runtime loading flags (-brtl),
+      # -berok will link without error, but may produce a broken library.
+      allow_undefined_flag='-berok'
+      hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib'
+      archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+    else
+      if test "$host_cpu" = ia64; then
+       hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+       allow_undefined_flag="-z nodefs"
+       archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"
+      else
+       hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib'
+       # Warning - without using the other run time loading flags,
+       # -berok will link without error, but may produce a broken library.
+       allow_undefined_flag='${wl}-berok'
+       # This is a bit strange, but is similar to how AIX traditionally builds
+       # it's shared libraries.
+       archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname'
+      fi
+    fi
+    ;;
+
+  amigaos*)
+    archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+    # see comment about different semantics on the GNU ld section
+    ld_shlibs=no
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    # hardcode_libdir_flag_spec is actually meaningless, as there is
+    # no search path for DLLs.
+    hardcode_libdir_flag_spec=' '
+    allow_undefined_flag=unsupported
+    # Tell ltmain to make .lib files, not .a files.
+    libext=lib
+    # FIXME: Setting linknames here is a bad hack.
+    archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames='
+    # The linker will automatically build a .lib file if we build a DLL.
+    old_archive_from_new_cmds='true'
+    # FIXME: Should let the user specify the lib program.
+    old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs'
+    fix_srcfile_path='`cygpath -w "$srcfile"`'
+    ;;
+
+  darwin* | rhapsody*)
+    case "$host_os" in
+    rhapsody* | darwin1.[012])
+      allow_undefined_flag='-undefined suppress'
+      ;;
+    *) # Darwin 1.3 on
+      allow_undefined_flag='-flat_namespace -undefined suppress'
+      ;;
+    esac
+    # FIXME: Relying on posixy $() will cause problems for
+    #        cross-compilation, but unfortunately the echo tests do not
+    #        yet detect zsh echo's removal of \ escapes.  Also zsh mangles
+    #       `"' quotes if we put them in here... so don't!
+    archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$linker_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)'
+    # We need to add '_' to the symbols in $export_symbols first
+    #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    whole_archive_flag_spec='-all_load $convenience'
+    ;;
+
+  freebsd1*)
+    ld_shlibs=no
+    ;;
+
+  # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+  # support.  Future versions do this automatically, but an explicit c++rt0.o
+  # does not break anything, and helps significantly (at the cost of a little
+  # extra space).
+  freebsd2.2*)
+    archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+  freebsd2*)
+    archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_minus_L=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+  freebsd*)
+    archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  hpux9* | hpux10* | hpux11*)
+    case $host_os in
+    hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;;
+    *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;;
+    esac
+    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+    hardcode_libdir_separator=:
+    hardcode_direct=yes
+    hardcode_minus_L=yes # Not in the search PATH, but as the default
+                        # location of the library.
+    export_dynamic_flag_spec='${wl}-E'
+    ;;
+
+  irix5* | irix6* | nonstopux*)
+    if test "$GCC" = yes; then
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    else
+      archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='-rpath $libdir'
+    fi
+    hardcode_libdir_separator=:
+    link_all_deplibs=yes
+    ;;
+
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+    else
+      archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+    fi
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  newsos6)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    hardcode_libdir_separator=:
+    hardcode_shlibpath_var=no
+    ;;
+
+  openbsd*)
+    hardcode_direct=yes
+    hardcode_shlibpath_var=no
+    if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+      archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec='${wl}-E'
+    else
+      case "$host_os" in
+      openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+       archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+       hardcode_libdir_flag_spec='-R$libdir'
+        ;;
+      *)
+        archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+        hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+        ;;
+      esac
+    fi
+    ;;
+
+  os2*)
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_minus_L=yes
+    allow_undefined_flag=unsupported
+    archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+    old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+    ;;
+
+  osf3*)
+    if test "$GCC" = yes; then
+      allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+      archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+    else
+      allow_undefined_flag=' -expect_unresolved \*'
+      archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+    fi
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    hardcode_libdir_separator=:
+    ;;
+
+  osf4* | osf5*)       # as osf3* with the addition of -msym flag
+    if test "$GCC" = yes; then
+      allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+      archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    else
+      allow_undefined_flag=' -expect_unresolved \*'
+      archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      archive_expsym_cmds='for i in `cat $export_symbols`; do printf "-exported_symbol " >> $lib.exp; echo "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~
+      $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib~$rm $lib.exp'
+
+      #Both c and cxx compiler support -rpath directly
+      hardcode_libdir_flag_spec='-rpath $libdir'
+    fi
+    hardcode_libdir_separator=:
+    ;;
+
+  sco3.2v5*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_shlibpath_var=no
+    runpath_var=LD_RUN_PATH
+    hardcode_runpath_var=yes
+    export_dynamic_flag_spec='${wl}-Bexport'
+    ;;
+
+  solaris*)
+    # gcc --version < 3.0 without binutils cannot create self contained
+    # shared libraries reliably, requiring libgcc.a to resolve some of
+    # the object symbols generated in some cases.  Libraries that use
+    # assert need libgcc.a to resolve __eprintf, for example.  Linking
+    # a copy of libgcc.a into every shared library to guarantee resolving
+    # such symbols causes other problems:  According to Tim Van Holder
+    # <tim.van.holder@pandora.be>, C++ libraries end up with a separate
+    # (to the application) exception stack for one thing.
+    no_undefined_flag=' -z defs'
+    if test "$GCC" = yes; then
+      case `$CC --version 2>/dev/null` in
+      [12].*)
+       cat <<EOF 1>&2
+
+*** Warning: Releases of GCC earlier than version 3.0 cannot reliably
+*** create self contained shared libraries on Solaris systems, without
+*** introducing a dependency on libgcc.a.  Therefore, libtool is disabling
+*** -no-undefined support, which will at least allow you to build shared
+*** libraries.  However, you may find that when you link such libraries
+*** into an application without using GCC, you have to manually add
+*** \`gcc --print-libgcc-file-name\` to the link command.  We urge you to
+*** upgrade to a newer version of GCC.  Another option is to rebuild your
+*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer.
+
+EOF
+        no_undefined_flag=
+       ;;
+      esac
+    fi
+    # $CC -shared without GNU ld will not create a library from C++
+    # object files and a static libstdc++, better avoid it by now
+    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+               $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+    hardcode_libdir_flag_spec='-R$libdir'
+    hardcode_shlibpath_var=no
+    case $host_os in
+    solaris2.[0-5] | solaris2.[0-5].*) ;;
+    *) # Supported since Solaris 2.6 (maybe 2.5.1?)
+      whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
+    esac
+    link_all_deplibs=yes
+    ;;
+
+  sunos4*)
+    if test "x$host_vendor" = xsequent; then
+      # Use $CC to link under sequent, because it throws in some extra .o
+      # files that make .init and .fini sections work.
+      archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+    else
+      archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+    fi
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_direct=yes
+    hardcode_minus_L=yes
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4)
+    case $host_vendor in
+      sni)
+        archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+        hardcode_direct=yes # is this really true???
+        ;;
+      siemens)
+        ## LD is ld it makes a PLAMLIB
+        ## CC just makes a GrossModule.
+        archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+        reload_cmds='$CC -r -o $output$reload_objs'
+        hardcode_direct=no
+        ;;
+      motorola)
+        archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+        hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+        ;;
+    esac
+    runpath_var='LD_RUN_PATH'
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4.3*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_shlibpath_var=no
+    export_dynamic_flag_spec='-Bexport'
+    ;;
+
+  sysv5*)
+    no_undefined_flag=' -z text'
+    # $CC -shared without GNU ld will not create a library from C++
+    # object files and a static libstdc++, better avoid it by now
+    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+               $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+    hardcode_libdir_flag_spec=
+    hardcode_shlibpath_var=no
+    runpath_var='LD_RUN_PATH'
+    ;;
+
+  uts4*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_shlibpath_var=no
+    ;;
+
+  dgux*)
+    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_libdir_flag_spec='-L$libdir'
+    hardcode_shlibpath_var=no
+    ;;
+
+  sysv4*MP*)
+    if test -d /usr/nec; then
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var=no
+      runpath_var=LD_RUN_PATH
+      hardcode_runpath_var=yes
+      ld_shlibs=yes
+    fi
+    ;;
+
+  sysv4.2uw2*)
+    archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+    hardcode_direct=yes
+    hardcode_minus_L=no
+    hardcode_shlibpath_var=no
+    hardcode_runpath_var=yes
+    runpath_var=LD_RUN_PATH
+    ;;
+
+  sysv5uw7* | unixware7*)
+    no_undefined_flag='${wl}-z ${wl}text'
+    if test "$GCC" = yes; then
+      archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+    else
+      archive_cmds='$CC -G ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+    fi
+    runpath_var='LD_RUN_PATH'
+    hardcode_shlibpath_var=no
+    ;;
+
+  *)
+    ld_shlibs=no
+    ;;
+  esac
+fi
+echo "$ac_t""$ld_shlibs" 1>&6
+test "$ld_shlibs" = no && can_build_shared=no
+
+# Check hardcoding attributes.
+echo $ac_n "checking how to hardcode library paths into programs""... $ac_c" 1>&6
+echo "configure:4601: checking how to hardcode library paths into programs" >&5
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" || \
+   test -n "$runpath_var"; then
+
+  # We can hardcode non-existant directories.
+  if test "$hardcode_direct" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$hardcode_shlibpath_var" != no &&
+     test "$hardcode_minus_L" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action=unsupported
+fi
+echo "$ac_t""$hardcode_action" 1>&6
+
+striplib=
+old_striplib=
+echo $ac_n "checking whether stripping libraries is possible""... $ac_c" 1>&6
+echo "configure:4629: checking whether stripping libraries is possible" >&5
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  echo "$ac_t""yes" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+# PORTME Fill in your ld.so characteristics
+echo $ac_n "checking dynamic linker characteristics""... $ac_c" 1>&6
+echo "configure:4643: checking dynamic linker characteristics" >&5
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}.so$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}.so$major ${libname}${release}.so$versuffix $libname.so'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+       if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+            echo ' yes '
+            echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+         :
+       else
+         can_build_shared=no
+       fi
+       ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can
+    # not hardcode correct soname into executable. Probably we can
+    # add versioning support to collect2, so additional links can
+    # be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}.so$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  hardcode_into_libs=yes
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}.so'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi4*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  export_dynamic_flag_spec=-rdynamic
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  need_version=no
+  need_lib_prefix=no
+  case $GCC,$host_os in
+  yes,cygwin*)
+    library_names_spec='$libname.dll.a'
+    soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
+    postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog .libs/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`bash 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    ;;
+  yes,mingw*)
+    library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
+    sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g" -e "s,=/,/,g"`
+    ;;
+  yes,pw32*)
+    library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/./-/g'`${versuffix}.dll'
+    ;;
+  *)
+    library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  # FIXME: Relying on posixy $() will cause problems for
+  #        cross-compilation, but unfortunately the echo tests do not
+  #        yet detect zsh echo's removal of \ escapes.
+  library_names_spec='${libname}${release}${versuffix}.$(test .$module = .yes && echo so || echo dylib) ${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib) ${libname}.$(test .$module = .yes && echo so || echo dylib)'
+  soname_spec='${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib)'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  ;;
+
+freebsd1*)
+  dynamic_linker=no
+  ;;
+
+freebsd*)
+  objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout`
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  *)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  dynamic_linker="$host_os dld.sl"
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  shlibpath_var=SHLIB_PATH
+  shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+  library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl'
+  soname_spec='${libname}${release}.sl$major'
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)          version_type=irix ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+
+  # Find out which ABI we are using (multilib Linux x86_64 hack).
+  libsuff=
+  case "$host_cpu" in
+  x86_64*|s390x*)
+    echo '#line 4902 "configure"' > conftest.$ac_ext
+    if { (eval echo configure:4903: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+      case `/usr/bin/file conftest.$ac_objext` in
+      *64-bit*)
+        libsuff=64
+        ;;
+      esac
+    fi
+    rm -rf conftest*
+    ;;
+  *)
+    ;;
+  esac
+  sys_lib_dlsearch_path_spec="/lib${libsuff} /usr/lib${libsuff}"
+  sys_lib_search_path_spec="/lib${libsuff} /usr/lib${libsuff} /usr/local/lib${libsuff}"
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so'
+    soname_spec='${libname}${release}.so$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case "$host_os" in
+    openbsd2.[89] | openbsd2.[89].*)
+      shlibpath_overrides_runpath=no
+      ;;
+    *)
+      shlibpath_overrides_runpath=yes
+      ;;
+    esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+os2*)
+  libname_spec='$name'
+  need_lib_prefix=no
+  library_names_spec='$libname.dll $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_version=no
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  hardcode_into_libs=yes
+  ;;
+
+sco3.2v5*)
+  version_type=osf
+  soname_spec='${libname}${release}.so$major'
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+  soname_spec='${libname}${release}.so$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so'
+    soname_spec='$libname.so.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+echo "$ac_t""$dynamic_linker" 1>&6
+test "$dynamic_linker" = no && can_build_shared=no
+
+# Report the final consequences.
+echo $ac_n "checking if libtool supports shared libraries""... $ac_c" 1>&6
+echo "configure:5074: checking if libtool supports shared libraries" >&5
+echo "$ac_t""$can_build_shared" 1>&6
+
+echo $ac_n "checking whether to build shared libraries""... $ac_c" 1>&6
+echo "configure:5078: checking whether to build shared libraries" >&5
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case "$host_os" in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+
+aix4*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+  ;;
+esac
+echo "$ac_t""$enable_shared" 1>&6
+
+echo $ac_n "checking whether to build static libraries""... $ac_c" 1>&6
+echo "configure:5101: checking whether to build static libraries" >&5
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+echo "$ac_t""$enable_static" 1>&6
+
+if test "$hardcode_action" = relink; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+   ;;
+
+  *)
+    echo $ac_n "checking for shl_load""... $ac_c" 1>&6
+echo "configure:5142: checking for shl_load" >&5
+if eval "test \"`echo '$''{'ac_cv_func_shl_load'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 5147 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char shl_load(); below.  */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char shl_load();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined (__stub_shl_load) || defined (__stub___shl_load)
+choke me
+#else
+shl_load();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo configure:5170: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_func_shl_load=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_func_shl_load=no"
+fi
+rm -f conftest*
+fi
+
+if eval "test \"`echo '$ac_cv_func_'shl_load`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="shl_load"
+else
+  echo "$ac_t""no" 1>&6
+echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6
+echo "configure:5188: checking for shl_load in -ldld" >&5
+ac_lib_var=`echo dld'_'shl_load | sed 'y%./+-%__p_%'`
+if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  ac_save_LIBS="$LIBS"
+LIBS="-ldld  $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 5196 "configure"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char shl_load();
+
+int main() {
+shl_load()
+; return 0; }
+EOF
+if { (eval echo configure:5207: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=no"
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"
+else
+  echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dlopen""... $ac_c" 1>&6
+echo "configure:5226: checking for dlopen" >&5
+if eval "test \"`echo '$''{'ac_cv_func_dlopen'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 5231 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char dlopen(); below.  */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char dlopen();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined (__stub_dlopen) || defined (__stub___dlopen)
+choke me
+#else
+dlopen();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo configure:5254: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_func_dlopen=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_func_dlopen=no"
+fi
+rm -f conftest*
+fi
+
+if eval "test \"`echo '$ac_cv_func_'dlopen`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="dlopen"
+else
+  echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6
+echo "configure:5272: checking for dlopen in -ldl" >&5
+ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'`
+if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  ac_save_LIBS="$LIBS"
+LIBS="-ldl  $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 5280 "configure"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char dlopen();
+
+int main() {
+dlopen()
+; return 0; }
+EOF
+if { (eval echo configure:5291: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=no"
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+  echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dlopen in -lsvld""... $ac_c" 1>&6
+echo "configure:5310: checking for dlopen in -lsvld" >&5
+ac_lib_var=`echo svld'_'dlopen | sed 'y%./+-%__p_%'`
+if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  ac_save_LIBS="$LIBS"
+LIBS="-lsvld  $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 5318 "configure"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char dlopen();
+
+int main() {
+dlopen()
+; return 0; }
+EOF
+if { (eval echo configure:5329: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=no"
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
+else
+  echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6
+echo "configure:5348: checking for dld_link in -ldld" >&5
+ac_lib_var=`echo dld'_'dld_link | sed 'y%./+-%__p_%'`
+if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  ac_save_LIBS="$LIBS"
+LIBS="-ldld  $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 5356 "configure"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char dld_link();
+
+int main() {
+dld_link()
+; return 0; }
+EOF
+if { (eval echo configure:5367: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_lib_$ac_lib_var=no"
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+             
+fi
+
+           
+fi
+
+         
+fi
+
+       
+fi
+
+      
+fi
+
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+        test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6
+echo "configure:5423: checking whether a program can dlopen itself" >&5
+if eval "test \"`echo '$''{'lt_cv_dlopen_self'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self=cross
+else
+    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+#line 5433 "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+
+    exit (status);
+}
+EOF
+  if { (eval echo configure:5494: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_unknown|x*) lt_cv_dlopen_self=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self=no
+  fi
+fi
+rm -fr conftest*
+
+    
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self" 1>&6
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      LDFLAGS="$LDFLAGS $link_static_flag"
+      echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6
+echo "configure:5517: checking whether a statically linked program can dlopen itself" >&5
+if eval "test \"`echo '$''{'lt_cv_dlopen_self_static'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self_static=cross
+else
+    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+#line 5527 "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+
+    exit (status);
+}
+EOF
+  if { (eval echo configure:5588: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_unknown|x*) lt_cv_dlopen_self_static=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self_static=no
+  fi
+fi
+rm -fr conftest*
+
+      
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+
+
+if test "$enable_shared" = yes && test "$GCC" = yes; then
+  case $archive_cmds in
+  *'~'*)
+    # FIXME: we may have to deal with multi-command sequences.
+    ;;
+  '$CC '*)
+    # Test whether the compiler implicitly links with -lc since on some
+    # systems, -lgcc has to come before -lc. If gcc already passes -lc
+    # to ld, don't add -lc before -lgcc.
+    echo $ac_n "checking whether -lc should be explicitly linked in""... $ac_c" 1>&6
+echo "configure:5637: checking whether -lc should be explicitly linked in" >&5
+    if eval "test \"`echo '$''{'lt_cv_archive_cmds_need_lc'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  $rm conftest*
+    echo 'static int dummy;' > conftest.$ac_ext
+
+    if { (eval echo configure:5644: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+      soname=conftest
+      lib=conftest
+      libobjs=conftest.$ac_objext
+      deplibs=
+      wl=$lt_cv_prog_cc_wl
+      compiler_flags=-v
+      linker_flags=-v
+      verstring=
+      output_objdir=.
+      libname=conftest
+      save_allow_undefined_flag=$allow_undefined_flag
+      allow_undefined_flag=
+      if { (eval echo configure:5657: \"$archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1\") 1>&5; (eval $archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) 2>&5; }
+      then
+       lt_cv_archive_cmds_need_lc=no
+      else
+       lt_cv_archive_cmds_need_lc=yes
+      fi
+      allow_undefined_flag=$save_allow_undefined_flag
+    else
+      cat conftest.err 1>&5
+    fi
+    $rm conftest*
+fi
+
+    echo "$ac_t""$lt_cv_archive_cmds_need_lc" 1>&6
+    ;;
+  esac
+fi
+need_lc=${lt_cv_archive_cmds_need_lc-yes}
+
+# The second clause should only fire when bootstrapping the
+# libtool distribution, otherwise you forgot to ship ltmain.sh
+# with your package, and you will get complaints that there are
+# no rules to generate ltmain.sh.
+if test -f "$ltmain"; then
+  :
+else
+  # If there is no Makefile yet, we rely on a make rule to execute
+  # `config.status --recheck' to rerun these tests and create the
+  # libtool script then.
+  test -f Makefile && make "$ltmain"
+fi
+
+if test -f "$ltmain"; then
+  trap "$rm \"${ofile}T\"; exit 1" 1 2 15
+  $rm -f "${ofile}T"
+
+  echo creating $ofile
+
+  # Now quote all the things that may contain metacharacters while being
+  # careful not to overquote the AC_SUBSTed values.  We take copies of the
+  # variables and quote the copies for generation of the libtool script.
+  for var in echo old_CC old_CFLAGS SED \
+    AR AR_FLAGS CC LD LN_S NM SHELL \
+    reload_flag reload_cmds wl \
+    pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \
+    thread_safe_flag_spec whole_archive_flag_spec libname_spec \
+    library_names_spec soname_spec \
+    RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
+    old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \
+    postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \
+    old_striplib striplib file_magic_cmd export_symbols_cmds \
+    deplibs_check_method allow_undefined_flag no_undefined_flag \
+    finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \
+    global_symbol_to_c_name_address \
+    hardcode_libdir_flag_spec hardcode_libdir_separator  \
+    sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+    compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do
+
+    case $var in
+    reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
+    old_postinstall_cmds | old_postuninstall_cmds | \
+    export_symbols_cmds | archive_cmds | archive_expsym_cmds | \
+    extract_expsyms_cmds | old_archive_from_expsyms_cmds | \
+    postinstall_cmds | postuninstall_cmds | \
+    finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+      # Double-quote double-evaled strings.
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\""
+      ;;
+    *)
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\""
+      ;;
+    esac
+  done
+
+  cat <<__EOF__ > "${ofile}T"
+#! $SHELL
+
+# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A sed that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="${SED} -e s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+# ### BEGIN LIBTOOL CONFIG
+
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$need_lc
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+
+# An echo program that does not interpret backslashes.
+echo=$lt_echo
+
+# The archiver.
+AR=$lt_AR
+AR_FLAGS=$lt_AR_FLAGS
+
+# The default C compiler.
+CC=$lt_CC
+
+# Is the compiler the GNU C compiler?
+with_gcc=$GCC
+
+# The linker used to build libraries.
+LD=$lt_LD
+
+# Whether we need hard or soft links.
+LN_S=$lt_LN_S
+
+# A BSD-compatible nm program.
+NM=$lt_NM
+
+# A symbol stripping program
+STRIP=$STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC_CMD=$MAGIC_CMD
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$lt_wl
+
+# Object file suffix (normally "o").
+objext="$ac_objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_pic_flag
+pic_mode=$pic_mode
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_compiler_c_o
+
+# Can we write directly to a .lo ?
+compiler_o_lo=$lt_compiler_o_lo
+
+# Must we lock files when doing compilation ?
+need_locks=$lt_need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_link_static_flag
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_no_builtin_flag
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$lt_thread_safe_flag_spec
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$lt_RANLIB
+old_archive_cmds=$lt_old_archive_cmds
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
+
+# Commands used to build and install a shared archive.
+archive_cmds=$lt_archive_cmds
+archive_expsym_cmds=$lt_archive_expsym_cmds
+postinstall_cmds=$lt_postinstall_cmds
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$lt_file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$lt_finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$lt_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair
+global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at relink time.
+variables_saved_for_relink="$variables_saved_for_relink"
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$fix_srcfile_path"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms
+
+# ### END LIBTOOL CONFIG
+
+__EOF__
+
+  case $host_os in
+  aix3*)
+    cat <<\EOF >> "${ofile}T"
+
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+EOF
+    ;;
+  esac
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | os2*)
+    cat <<'EOF' >> "${ofile}T"
+      # This is a source program that is used to create dlls on Windows
+      # Don't remove nor modify the starting and closing comments
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# #  ifdef __CYGWIN32__
+# #    define __CYGWIN__ __CYGWIN32__
+# #  endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+#   __hDllInstance_base = hInst;
+#   return TRUE;
+# }
+# /* ltdll.c ends here */
+       # This is a source program that is used to create import libraries
+       # on Windows for dlls which lack them. Don't remove nor modify the
+       # starting and closing comments
+# /* impgen.c starts here */
+# /*   Copyright (C) 1999-2000 Free Software Foundation, Inc.
+#
+#  This file is part of GNU libtool.
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#  */
+#
+# #include <stdio.h>           /* for printf() */
+# #include <unistd.h>          /* for open(), lseek(), read() */
+# #include <fcntl.h>           /* for O_RDONLY, O_BINARY */
+# #include <string.h>          /* for strdup() */
+#
+# /* O_BINARY isn't required (or even defined sometimes) under Unix */
+# #ifndef O_BINARY
+# #define O_BINARY 0
+# #endif
+#
+# static unsigned int
+# pe_get16 (fd, offset)
+#      int fd;
+#      int offset;
+# {
+#   unsigned char b[2];
+#   lseek (fd, offset, SEEK_SET);
+#   read (fd, b, 2);
+#   return b[0] + (b[1]<<8);
+# }
+#
+# static unsigned int
+# pe_get32 (fd, offset)
+#     int fd;
+#     int offset;
+# {
+#   unsigned char b[4];
+#   lseek (fd, offset, SEEK_SET);
+#   read (fd, b, 4);
+#   return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# static unsigned int
+# pe_as32 (ptr)
+#      void *ptr;
+# {
+#   unsigned char *b = ptr;
+#   return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# int
+# main (argc, argv)
+#     int argc;
+#     char *argv[];
+# {
+#     int dll;
+#     unsigned long pe_header_offset, opthdr_ofs, num_entries, i;
+#     unsigned long export_rva, export_size, nsections, secptr, expptr;
+#     unsigned long name_rvas, nexp;
+#     unsigned char *expdata, *erva;
+#     char *filename, *dll_name;
+#
+#     filename = argv[1];
+#
+#     dll = open(filename, O_RDONLY|O_BINARY);
+#     if (dll < 1)
+#      return 1;
+#
+#     dll_name = filename;
+#
+#     for (i=0; filename[i]; i++)
+#      if (filename[i] == '/' || filename[i] == '\\'  || filename[i] == ':')
+#          dll_name = filename + i +1;
+#
+#     pe_header_offset = pe_get32 (dll, 0x3c);
+#     opthdr_ofs = pe_header_offset + 4 + 20;
+#     num_entries = pe_get32 (dll, opthdr_ofs + 92);
+#
+#     if (num_entries < 1) /* no exports */
+#      return 1;
+#
+#     export_rva = pe_get32 (dll, opthdr_ofs + 96);
+#     export_size = pe_get32 (dll, opthdr_ofs + 100);
+#     nsections = pe_get16 (dll, pe_header_offset + 4 +2);
+#     secptr = (pe_header_offset + 4 + 20 +
+#            pe_get16 (dll, pe_header_offset + 4 + 16));
+#
+#     expptr = 0;
+#     for (i = 0; i < nsections; i++)
+#     {
+#      char sname[8];
+#      unsigned long secptr1 = secptr + 40 * i;
+#      unsigned long vaddr = pe_get32 (dll, secptr1 + 12);
+#      unsigned long vsize = pe_get32 (dll, secptr1 + 16);
+#      unsigned long fptr = pe_get32 (dll, secptr1 + 20);
+#      lseek(dll, secptr1, SEEK_SET);
+#      read(dll, sname, 8);
+#      if (vaddr <= export_rva && vaddr+vsize > export_rva)
+#      {
+#          expptr = fptr + (export_rva - vaddr);
+#          if (export_rva + export_size > vaddr + vsize)
+#              export_size = vsize - (export_rva - vaddr);
+#          break;
+#      }
+#     }
+#
+#     expdata = (unsigned char*)malloc(export_size);
+#     lseek (dll, expptr, SEEK_SET);
+#     read (dll, expdata, export_size);
+#     erva = expdata - export_rva;
+#
+#     nexp = pe_as32 (expdata+24);
+#     name_rvas = pe_as32 (expdata+32);
+#
+#     printf ("EXPORTS\n");
+#     for (i = 0; i<nexp; i++)
+#     {
+#      unsigned long name_rva = pe_as32 (erva+name_rvas+i*4);
+#      printf ("\t%s @ %ld ;\n", erva+name_rva, 1+ i);
+#     }
+#
+#     return 0;
+# }
+# /* impgen.c ends here */
+
+EOF
+    ;;
+  esac
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "${ofile}T" || (rm -f "${ofile}T"; exit 1)
+
+  mv -f "${ofile}T" "$ofile" || \
+    (rm -f "$ofile" && cp "${ofile}T" "$ofile" && rm -f "${ofile}T")
+  chmod +x "$ofile"
+fi
+
+
+
+
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+# Prevent multiple expansion
+
+
+fi
+# if libtool >= 1.5
+TAGCC=
+
+
+# Select memory manager depending on user input.
+# If no "-enable-maxmem", use jmemnobs
+MEMORYMGR='jmemnobs.$(O)'
+MAXMEM="no"
+# Check whether --enable-maxmem or --disable-maxmem was given.
+if test "${enable_maxmem+set}" = set; then
+  enableval="$enable_maxmem"
+  MAXMEM="$enableval"
+fi
+
+# support --with-maxmem for backwards compatibility with IJG V5.
+# Check whether --with-maxmem or --without-maxmem was given.
+if test "${with_maxmem+set}" = set; then
+  withval="$with_maxmem"
+  MAXMEM="$withval"
+fi
+
+if test "x$MAXMEM" = xyes; then
+  MAXMEM=1
+fi
+if test "x$MAXMEM" != xno; then
+  if test -n "`echo $MAXMEM | sed 's/[0-9]//g'`"; then
+    { echo "configure: error: non-numeric argument to --enable-maxmem" 1>&2; exit 1; }
+  fi
+  DEFAULTMAXMEM=`expr $MAXMEM \* 1048576`
+cat >> confdefs.h <<EOF
+#define DEFAULT_MAX_MEM ${DEFAULTMAXMEM}
+EOF
+
+echo $ac_n "checking for 'tmpfile()'""... $ac_c" 1>&6
+echo "configure:6277: checking for 'tmpfile()'" >&5
+cat > conftest.$ac_ext <<EOF
+#line 6279 "configure"
+#include "confdefs.h"
+#include <stdio.h>
+int main() {
+ FILE * tfile = tmpfile(); 
+; return 0; }
+EOF
+if { (eval echo configure:6286: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  echo "$ac_t""yes" 1>&6
+MEMORYMGR='jmemansi.$(O)'
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  echo "$ac_t""no" 1>&6
+MEMORYMGR='jmemname.$(O)'
+cat >> confdefs.h <<\EOF
+#define NEED_SIGNAL_CATCHER 
+EOF
+
+echo $ac_n "checking for 'mktemp()'""... $ac_c" 1>&6
+echo "configure:6301: checking for 'mktemp()'" >&5
+cat > conftest.$ac_ext <<EOF
+#line 6303 "configure"
 #include "confdefs.h"
 
 int main() {
  char fname[80]; mktemp(fname); 
 ; return 0; }
 EOF
-if { (eval echo configure:1629: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+if { (eval echo configure:6310: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
   rm -rf conftest*
   echo "$ac_t""yes" 1>&6
 else
@@ -1644,11 +6325,354 @@ rm -f conftest*
 fi
 
 
-# Extract the library version ID from jpeglib.h.
-echo $ac_n "checking libjpeg version number""... $ac_c" 1>&6
-echo "configure:1650: checking libjpeg version number" >&5
-JPEG_LIB_VERSION=`sed -e '/^#define JPEG_LIB_VERSION/!d' -e 's/^[^0-9]*\([0-9][0-9]*\).*$/\1/' $srcdir/jpeglib.h`
-echo "$ac_t""$JPEG_LIB_VERSION" 1>&6
+
+echo $ac_n "checking to see if the host cpu type is i386 or compatible""... $ac_c" 1>&6
+echo "configure:6331: checking to see if the host cpu type is i386 or compatible" >&5
+case "$host_cpu" in
+  i*86 | x86 | ia32)
+    echo "$ac_t""yes" 1>&6
+  ;;
+  x86_64 | amd64 | aa64)
+    echo "$ac_t""no (x86_64)" 1>&6
+    { echo "configure: error: Currently, this version of JPEG library cannot be compiled as 64-bit code. sorry." 1>&2; exit 1; }
+  ;;
+  *)
+    echo "$ac_t""no ("$host_cpu")" 1>&6
+    { echo "configure: error: This version of JPEG library is for i386 or compatible processors only." 1>&2; exit 1; }
+  ;;
+esac
+
+if test -z "$NAFLAGS" ; then
+  echo $ac_n "checking for object file format of host system""... $ac_c" 1>&6
+echo "configure:6348: checking for object file format of host system" >&5
+  case "$host_os" in
+    cygwin* | mingw* | pw32* | interix*)
+      objfmt='Win32-COFF'
+    ;;
+    msdosdjgpp* | go32*)
+      objfmt='COFF'
+    ;;
+    os2-emx*)                  # not tested
+      objfmt='MSOMF'           # obj
+    ;;
+    linux*coff* | linux*oldld*)
+      objfmt='COFF'            # ???
+    ;;
+    linux*aout*)
+      objfmt='a.out'
+    ;;
+    linux*)
+      objfmt='ELF'
+    ;;
+    freebsd* | netbsd* | openbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+        objfmt='BSD-a.out'
+      else
+        objfmt='ELF'
+      fi
+    ;;
+    solaris* | sunos* | sysv* | sco*)
+      objfmt='ELF'
+    ;;
+    darwin* | rhapsody* | nextstep* | openstep* | macos*)
+      objfmt='Mach-O'
+    ;;
+    *)
+      objfmt='ELF ?'
+    ;;
+  esac
+  echo "$ac_t""$objfmt" 1>&6
+  if test "$objfmt" = 'ELF ?'; then
+    objfmt='ELF'
+    echo "configure: warning: unexpected host system. assumed that the format is $objfmt." 1>&2
+  fi
+else
+  objfmt=''
+fi
+echo $ac_n "checking for object file format specifier (NAFLAGS) ""... $ac_c" 1>&6
+echo "configure:6394: checking for object file format specifier (NAFLAGS) " >&5
+case "$objfmt" in
+  MSOMF)      NAFLAGS='-fobj -DOBJ32';;
+  Win32-COFF) NAFLAGS='-fwin32 -DWIN32';;
+  COFF)       NAFLAGS='-fcoff -DCOFF';;
+  a.out)      NAFLAGS='-faout -DAOUT';;
+  BSD-a.out)  NAFLAGS='-faoutb -DAOUT';;
+  ELF)        NAFLAGS='-felf -DELF';;
+  RDF)        NAFLAGS='-frdf -DRDF';;
+  Mach-O)     NAFLAGS='-fmacho -DMACHO';;
+esac
+echo "$ac_t""$NAFLAGS" 1>&6
+
+
+
+for ac_prog in nasm nasmw
+do
+# Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:6414: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_NASM'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$NASM"; then
+  ac_cv_prog_NASM="$NASM" # Let the user override the test.
+else
+  IFS="${IFS=  }"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_NASM="$ac_prog"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+NASM="$ac_cv_prog_NASM"
+if test -n "$NASM"; then
+  echo "$ac_t""$NASM" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+test -n "$NASM" && break
+done
+
+test -z "$NASM" && { echo "configure: error: no nasm (Netwide Assembler) found in \$PATH" 1>&2; exit 1; }
+if echo "$NASM" | grep yasm > /dev/null; then
+  echo "configure: warning: DON'T USE YASM! CURRENT VERSION (R0.4.0) IS BUGGY!" 1>&2
+fi
+
+echo $ac_n "checking whether the assembler ($NASM $NAFLAGS) works""... $ac_c" 1>&6
+echo "configure:6449: checking whether the assembler ($NASM $NAFLAGS) works" >&5
+cat > conftest.asm <<EOF
+%line 6451 "configure"
+        section .text
+        bits    32
+        global  _main,main
+_main:
+main:   xor     eax,eax
+        ret
+EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if { (eval echo configure:6460: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then
+  echo "$ac_t""yes" 1>&6
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.asm >&5
+  rm -rf conftest*
+  echo "$ac_t""no" 1>&6
+  { echo "configure: error: installation or configuration problem: assembler cannot create object files." 1>&2; exit 1; }
+fi
+echo $ac_n "checking whether the linker accepts assembler output""... $ac_c" 1>&6
+echo "configure:6470: checking whether the linker accepts assembler output" >&5
+try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&5'
+if { (eval echo configure:6472: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  echo "$ac_t""yes" 1>&6
+else
+  rm -rf conftest*
+  echo "$ac_t""no" 1>&6
+  { echo "configure: error: configuration problem: maybe object file format mismatch." 1>&2; exit 1; }
+fi
+
+echo $ac_n "checking whether the assembler supports line continuation character""... $ac_c" 1>&6
+echo "configure:6482: checking whether the assembler supports line continuation character" >&5
+cat > conftest.asm <<\EOF
+%line 6484 "configure"
+; The line continuation character '\'
+; was introduced in nasm 0.98.25.
+        section .text
+        bits    32
+        global  _zero
+_zero:  xor     \
+                eax,eax
+        ret
+EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if { (eval echo configure:6495: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then
+  rm -rf conftest*
+  echo "$ac_t""yes" 1>&6
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.asm >&5
+  rm -rf conftest*
+  echo "$ac_t""no" 1>&6
+  { echo "configure: error: you have to use a more recent version of the assembler." 1>&2; exit 1; }
+fi
+
+
+echo $ac_n "checking SIMD instruction sets requested to use""... $ac_c" 1>&6
+echo "configure:6508: checking SIMD instruction sets requested to use" >&5
+simd_to_use=""
+
+# Check whether --enable-mmx or --disable-mmx was given.
+if test "${enable_mmx+set}" = set; then
+  enableval="$enable_mmx"
+  if test "x$enableval" = xno; then
+  cat >> confdefs.h <<\EOF
+#define JSIMD_MMX_NOT_SUPPORTED 
+EOF
+
+else
+  simd_to_use="$simd_to_use MMX"
+fi
+else
+  simd_to_use="$simd_to_use MMX"
+fi
+
+
+# Check whether --enable-3dnow or --disable-3dnow was given.
+if test "${enable_3dnow+set}" = set; then
+  enableval="$enable_3dnow"
+  if test "x$enableval" = xno; then
+  cat >> confdefs.h <<\EOF
+#define JSIMD_3DNOW_NOT_SUPPORTED 
+EOF
+
+else
+  simd_to_use="$simd_to_use 3DNow!"
+fi
+else
+  simd_to_use="$simd_to_use 3DNow!"
+fi
+
+
+# Check whether --enable-sse or --disable-sse was given.
+if test "${enable_sse+set}" = set; then
+  enableval="$enable_sse"
+  if test "x$enableval" = xno; then
+  cat >> confdefs.h <<\EOF
+#define JSIMD_SSE_NOT_SUPPORTED 
+EOF
+
+else
+  simd_to_use="$simd_to_use SSE"
+fi
+else
+  simd_to_use="$simd_to_use SSE"
+fi
+
+
+# Check whether --enable-sse2 or --disable-sse2 was given.
+if test "${enable_sse2+set}" = set; then
+  enableval="$enable_sse2"
+  if test "x$enableval" = xno; then
+  cat >> confdefs.h <<\EOF
+#define JSIMD_SSE2_NOT_SUPPORTED 
+EOF
+
+else
+  simd_to_use="$simd_to_use SSE2"
+fi
+else
+  simd_to_use="$simd_to_use SSE2"
+fi
+
+
+test -z "$simd_to_use" && simd_to_use="NONE"
+echo "$ac_t""$simd_to_use" 1>&6
+
+for simd_name in $simd_to_use; do
+case "$simd_name" in
+  MMX)    simd_instruction='psubw mm0,mm0';;
+  3DNow!) simd_instruction='pfsub mm0,mm0';;
+  SSE)    simd_instruction='subps xmm0,xmm0';;
+  SSE2)   simd_instruction='subpd xmm0,xmm0';;
+  *)      continue;;
+esac
+echo $ac_n "checking whether the assembler supports $simd_name instructions""... $ac_c" 1>&6
+echo "configure:6587: checking whether the assembler supports $simd_name instructions" >&5
+cat > conftest.asm <<EOF
+%line 6589 "configure"
+        section .text
+        bits    32
+        global  _simd
+_simd:  $simd_instruction
+        ret
+EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if { (eval echo configure:6597: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then
+  rm -rf conftest*
+  echo "$ac_t""yes" 1>&6
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.asm >&5
+  rm -rf conftest*
+  echo "$ac_t""no" 1>&6
+  { echo "configure: error: you have to use a more recent version of the assembler." 1>&2; exit 1; }
+fi
+done
+
+# Select OS-dependent SIMD instruction support checker.
+# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc)
+if test "x$SIMDCHECKER" = x ; then
+  case "$host_os" in
+    cygwin* | mingw* | pw32* | interix*)
+      SIMDCHECKER='jsimdw32.$(O)'
+    ;;
+    msdosdjgpp* | go32*)
+      SIMDCHECKER='jsimddjg.$(O)'
+    ;;
+    os2-emx*)                  # not tested
+      SIMDCHECKER='jsimdgcc.$(O)'
+    ;;
+    *)
+      SIMDCHECKER='jsimdgcc.$(O)'
+    ;;
+  esac
+fi
+
+
+case "$host_os" in
+  cygwin* | mingw* | pw32* | os2-emx* | msdosdjgpp* | go32*)
+    cat >> confdefs.h <<\EOF
+#define USE_SETMODE 
+EOF
+
+  ;;
+# _host_name_*)
+#   AC_DEFINE([USE_FDOPEN],)
+# ;;
+esac
+
+# This is for UNIX-like environments on Windows platform.
+# Check whether --enable-uchar-boolean or --disable-uchar-boolean was given.
+if test "${enable_uchar_boolean+set}" = set; then
+  enableval="$enable_uchar_boolean"
+  if test "x$enableval" != xno; then
+  cat >> confdefs.h <<\EOF
+#define TYPEDEF_UCHAR_BOOLEAN 
+EOF
+
+fi
+fi
+
+
+
+JPEG_LIB_VERSION="63:0:1"
+confv_dirs="$srcdir $srcdir/.. $srcdir/../.."
+config_ver=
+for ac_dir in $confv_dirs; do
+  if test -r $ac_dir/config.ver; then
+    config_ver=$ac_dir/config.ver
+    break
+  fi
+done
+if test -z "$config_ver"; then
+  echo "configure: warning: cannot find config.ver in $confv_dirs" 1>&2
+  echo "configure: warning: default version number $JPEG_LIB_VERSION is used" 1>&2
+  echo $ac_n "checking libjpeg version number for libtool""... $ac_c" 1>&6
+echo "configure:6668: checking libjpeg version number for libtool" >&5
+  echo "$ac_t""$JPEG_LIB_VERSION" 1>&6
+else
+  echo $ac_n "checking libjpeg version number for libtool""... $ac_c" 1>&6
+echo "configure:6672: checking libjpeg version number for libtool" >&5
+  . $config_ver
+  echo "$ac_t""$JPEG_LIB_VERSION" 1>&6
+  echo "configure: if you want to change the version number, modify $config_ver" 1>&2
+fi
 
 
 # Prepare to massage makefile.cfg correctly.
@@ -1675,12 +6699,15 @@ else
   COM_LT="# "
 fi
 
-if test "x$LTSHARED" != xno; then
+if test "x$enable_shared" != xno; then
   FORCE_INSTALL_LIB="install-lib"
+  UNINSTALL_LIB="uninstall-lib"
 else
   FORCE_INSTALL_LIB=""
+  UNINSTALL_LIB=""
 fi
 
+
 # Set up -I directives
 if test "x$srcdir" = x.; then
   INCLUDEFLAGS='-I$(srcdir)'
@@ -1689,6 +6716,52 @@ else
 fi
 
 trap '' 1 2 15
+cat > confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs.  It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already.  You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+  case `(ac_space=' '; set | grep ac_space) 2>&1` in
+  *ac_space=\ *)
+    # `set' does not quote correctly, so add quotes (double-quote substitution
+    # turns \\\\ into \\, and sed turns \\ into \).
+    sed -n \
+      -e "s/'/'\\\\''/g" \
+      -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+    ;;
+  *)
+    # `set' quotes correctly as required by POSIX, so do not add quotes.
+    sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+    ;;
+  esac >> confcache
+if cmp -s $cache_file confcache; then
+  :
+else
+  if test -w $cache_file; then
+    echo "updating cache $cache_file"
+    cat confcache > $cache_file
+  else
+    echo "not updating unwritable cache $cache_file"
+  fi
+fi
+rm -f confcache
 
 trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
 
@@ -1732,7 +6805,7 @@ do
     echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion"
     exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
   -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
-    echo "$CONFIG_STATUS generated by autoconf version 2.12"
+    echo "$CONFIG_STATUS generated by autoconf version 2.13"
     exit 0 ;;
   -help | --help | --hel | --he | --h)
     echo "\$ac_cs_usage"; exit 0 ;;
@@ -1752,9 +6825,11 @@ sed 's/%@/@@/; s/@%/@@/; s/%g\$/@g/; /@g\$/s/[\\\\&%]/\\\\&/g;
  s/@@/%@/; s/@@/@%/; s/@g\$/%g/' > conftest.subs <<\\CEOF
 $ac_vpsub
 $extrasub
+s%@SHELL@%$SHELL%g
 s%@CFLAGS@%$CFLAGS%g
 s%@CPPFLAGS@%$CPPFLAGS%g
 s%@CXXFLAGS@%$CXXFLAGS%g
+s%@FFLAGS@%$FFLAGS%g
 s%@DEFS@%$DEFS%g
 s%@LDFLAGS@%$LDFLAGS%g
 s%@LIBS@%$LIBS%g
@@ -1776,20 +6851,45 @@ s%@mandir@%$mandir%g
 s%@CC@%$CC%g
 s%@CPP@%$CPP%g
 s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g
 s%@INSTALL_DATA@%$INSTALL_DATA%g
 s%@RANLIB@%$RANLIB%g
+s%@host@%$host%g
+s%@host_alias@%$host_alias%g
+s%@host_cpu@%$host_cpu%g
+s%@host_vendor@%$host_vendor%g
+s%@host_os@%$host_os%g
+s%@EXEEXT@%$EXEEXT%g
 s%@LIBTOOL@%$LIBTOOL%g
 s%@O@%$O%g
 s%@A@%$A%g
 s%@LN@%$LN%g
 s%@INSTALL_LIB@%$INSTALL_LIB%g
+s%@UNINSTALL@%$UNINSTALL%g
+s%@build@%$build%g
+s%@build_alias@%$build_alias%g
+s%@build_cpu@%$build_cpu%g
+s%@build_vendor@%$build_vendor%g
+s%@build_os@%$build_os%g
+s%@LN_S@%$LN_S%g
+s%@OBJEXT@%$OBJEXT%g
+s%@ECHO@%$ECHO%g
+s%@STRIP@%$STRIP%g
+s%@DLLTOOL@%$DLLTOOL%g
+s%@AS@%$AS%g
+s%@OBJDUMP@%$OBJDUMP%g
+s%@TAGCC@%$TAGCC%g
 s%@MEMORYMGR@%$MEMORYMGR%g
+s%@NAFLAGS@%$NAFLAGS%g
+s%@NASM@%$NASM%g
+s%@SIMDCHECKER@%$SIMDCHECKER%g
 s%@JPEG_LIB_VERSION@%$JPEG_LIB_VERSION%g
 s%@A2K_DEPS@%$A2K_DEPS%g
 s%@COM_A2K@%$COM_A2K%g
 s%@ANSI2KNRFLAGS@%$ANSI2KNRFLAGS%g
 s%@COM_LT@%$COM_LT%g
 s%@FORCE_INSTALL_LIB@%$FORCE_INSTALL_LIB%g
+s%@UNINSTALL_LIB@%$UNINSTALL_LIB%g
 s%@INCLUDEFLAGS@%$INCLUDEFLAGS%g
 
 CEOF
@@ -1952,6 +7052,7 @@ rm -f conftest.hdr
 # example, in the case of _POSIX_SOURCE, which is predefined and required
 # on some systems where configure will not decide to define it.
 cat >> conftest.vals <<\EOF
+s%^[   ]*#[    ]*undef[        ][      ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */%
 EOF
 
 # Break up conftest.vals because some shells have a limit on
diff --git a/configure.in b/configure.in
new file mode 100644 (file)
index 0000000..06171f0
--- /dev/null
@@ -0,0 +1,634 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT([jcmaster.c])
+AC_CONFIG_HEADER([jconfig.h:jconfig.cfg])
+dnl --------------------------------------------------------------------
+AC_PROG_CC
+AC_PROG_CPP
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for function prototypes])
+AC_CACHE_VAL([ijg_cv_have_prototypes],[AC_TRY_COMPILE([
+int testfunction (int arg1, int * arg2); /* check prototypes */
+struct methods_struct {                /* check method-pointer declarations */
+  int (*error_exit) (char *msgtext);
+  int (*trace_message) (char *msgtext);
+  int (*another_method) (void);
+};
+int testfunction (int arg1, int * arg2) /* check definitions */
+{ return arg2[arg1]; }
+int test2function (void)       /* check void arg list */
+{ return 0; }
+],[ ],[ijg_cv_have_prototypes=yes],[ijg_cv_have_prototypes=no])])
+AC_MSG_RESULT([$ijg_cv_have_prototypes])
+if test $ijg_cv_have_prototypes = yes; then
+  AC_DEFINE([HAVE_PROTOTYPES],)
+else
+  echo [Your compiler does not seem to know about function prototypes.]
+  echo [Perhaps it needs a special switch to enable ANSI C mode.]
+  echo [If so, we recommend running configure like this:]
+  echo ["   ./configure  CC='cc -switch'"]
+  echo [where -switch is the proper switch.]
+fi
+dnl --------------------------------------------------------------------
+AC_CHECK_HEADER([stddef.h],[AC_DEFINE([HAVE_STDDEF_H],)])
+AC_CHECK_HEADER([stdlib.h],[AC_DEFINE([HAVE_STDLIB_H],)])
+AC_CHECK_HEADER([string.h],[:],[AC_DEFINE([NEED_BSD_STRINGS],)])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for size_t])
+AC_TRY_COMPILE([
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <stdio.h>
+#ifdef NEED_BSD_STRINGS
+#include <strings.h>
+#else
+#include <string.h>
+#endif
+typedef size_t my_size_t;
+],[ my_size_t foovar; ],
+[ijg_size_t_ok=yes],
+[ijg_size_t_ok="not ANSI, perhaps it is in sys/types.h"])
+AC_MSG_RESULT([$ijg_size_t_ok])
+if test "$ijg_size_t_ok" != yes; then
+AC_CHECK_HEADER([sys/types.h],[AC_DEFINE([NEED_SYS_TYPES_H],)
+AC_EGREP_HEADER([size_t],[sys/types.h],
+[ijg_size_t_ok="size_t is in sys/types.h"],[ijg_size_t_ok=no])],
+[ijg_size_t_ok=no])
+AC_MSG_RESULT([$ijg_size_t_ok])
+if test "$ijg_size_t_ok" = no; then
+  echo [Type size_t is not defined in any of the usual places.]
+  echo [Try putting '"typedef unsigned int size_t;"' in jconfig.h.]
+fi
+fi
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for type unsigned char])
+AC_TRY_COMPILE(,[ unsigned char un_char; ],[AC_MSG_RESULT(yes)
+AC_DEFINE([HAVE_UNSIGNED_CHAR],)],[AC_MSG_RESULT(no)])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for type unsigned short])
+AC_TRY_COMPILE(,[ unsigned short un_short; ],[AC_MSG_RESULT(yes)
+AC_DEFINE([HAVE_UNSIGNED_SHORT],)],[AC_MSG_RESULT(no)])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for type void])
+AC_TRY_COMPILE([
+/* Caution: a C++ compiler will insist on valid prototypes */
+typedef void * void_ptr;       /* check void * */
+#ifdef HAVE_PROTOTYPES         /* check ptr to function returning void */
+typedef void (*void_func) (int a, int b);
+#else
+typedef void (*void_func) ();
+#endif
+
+#ifdef HAVE_PROTOTYPES         /* check void function result */
+void test3function (void_ptr arg1, void_func arg2)
+#else
+void test3function (arg1, arg2)
+     void_ptr arg1;
+     void_func arg2;
+#endif
+{
+  char * locptr = (char *) arg1; /* check casting to and from void * */
+  arg1 = (void *) locptr;
+  (*arg2) (1, 2);              /* check call of fcn returning void */
+}
+],[ ],[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)
+AC_DEFINE([void],[char])])
+
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for working const])
+AC_CACHE_VAL([ac_cv_c_const],[AC_TRY_COMPILE(,[
+/* Ultrix mips cc rejects this.  */
+typedef int charset[2]; const charset x;
+/* SunOS 4.1.1 cc rejects this.  */
+char const *const *ccp;
+char **p;
+/* NEC SVR4.0.2 mips cc rejects this.  */
+struct point {int x, y;};
+static struct point const zero = {0,0};
+/* AIX XL C 1.02.0.0 rejects this.
+   It does not let you subtract one const X* pointer from another in an arm
+   of an if-expression whose if-part is not a constant expression */
+const char *g = "string";
+ccp = &g + (g ? g-g : 0);
+/* HPUX 7.0 cc rejects these. */
+++ccp;
+p = (char**) ccp;
+ccp = (char const *const *) p;
+{ /* SCO 3.2v4 cc rejects this.  */
+  char *t;
+  char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+  *t++ = 0;
+}
+{ /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
+  int x[] = {25, 17};
+  const int *foo = &x[0];
+  ++foo;
+}
+{ /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+  typedef const int *iptr;
+  iptr p = 0;
+  ++p;
+}
+{ /* AIX XL C 1.02.0.0 rejects this saying
+     "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+  struct s { int j; const int *ap[3]; };
+  struct s *b; b->j = 5;
+}
+{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+  const int foo = 10;
+}
+],[ac_cv_c_const=yes],[ac_cv_c_const=no])])
+AC_MSG_RESULT([$ac_cv_c_const])
+if test $ac_cv_c_const = no; then
+  AC_DEFINE([const],)
+fi
+
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for inline])
+ijg_cv_inline=""
+AC_TRY_COMPILE(,[} __inline__ int foo() { return 0; }
+int bar() { return foo();],[ijg_cv_inline="__inline__"],
+[AC_TRY_COMPILE(,[} __inline int foo() { return 0; }
+int bar() { return foo();],[ijg_cv_inline="__inline"],
+[AC_TRY_COMPILE(,[} inline int foo() { return 0; }
+int bar() { return foo();],[ijg_cv_inline="inline"],)])])
+AC_MSG_RESULT([$ijg_cv_inline])
+AC_DEFINE_UNQUOTED([INLINE],[$ijg_cv_inline])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for broken incomplete types])
+AC_TRY_COMPILE([ typedef struct undefined_structure * undef_struct_ptr; ],
+,[AC_MSG_RESULT(ok)],[AC_MSG_RESULT(broken)
+AC_DEFINE([INCOMPLETE_TYPES_BROKEN],)])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([for short external names])
+AC_TRY_LINK([
+int possibly_duplicate_function () { return 0; }
+int possibly_dupli_function () { return 1; }
+],[ ],[AC_MSG_RESULT(ok)],[AC_MSG_RESULT(short)
+AC_DEFINE([NEED_SHORT_EXTERNAL_NAMES],)])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([to see if char is signed])
+AC_TRY_RUN([
+#ifdef HAVE_PROTOTYPES
+int is_char_signed (int arg)
+#else
+int is_char_signed (arg)
+     int arg;
+#endif
+{
+  if (arg == 189) {            /* expected result for unsigned char */
+    return 0;                  /* type char is unsigned */
+  }
+  else if (arg != -67) {       /* expected result for signed char */
+    printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n");
+    printf("I fear the JPEG software will not work at all.\n\n");
+  }
+  return 1;                    /* assume char is signed otherwise */
+}
+char signed_char_check = (char) (-67);
+main() {
+  exit(is_char_signed((int) signed_char_check));
+}],[AC_MSG_RESULT(no)
+AC_DEFINE([CHAR_IS_UNSIGNED],)],[AC_MSG_RESULT(yes)],
+[echo Assuming that char is signed on target machine.
+echo If it is unsigned, this will be a little bit inefficient.
+])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([to see if right shift is signed])
+AC_TRY_RUN([
+#ifdef HAVE_PROTOTYPES
+int is_shifting_signed (long arg)
+#else
+int is_shifting_signed (arg)
+     long arg;
+#endif
+/* See whether right-shift on a long is signed or not. */
+{
+  long res = arg >> 4;
+
+  if (res == -0x7F7E80CL) {    /* expected result for signed shift */
+    return 1;                  /* right shift is signed */
+  }
+  /* see if unsigned-shift hack will fix it. */
+  /* we can't just test exact value since it depends on width of long... */
+  res |= (~0L) << (32-4);
+  if (res == -0x7F7E80CL) {    /* expected result now? */
+    return 0;                  /* right shift is unsigned */
+  }
+  printf("Right shift isn't acting as I expect it to.\n");
+  printf("I fear the JPEG software will not work at all.\n\n");
+  return 0;                    /* try it with unsigned anyway */
+}
+main() {
+  exit(is_shifting_signed(-0x7F7E80B1L));
+}],[AC_MSG_RESULT(no)
+AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED],)],[AC_MSG_RESULT(yes)],
+[AC_MSG_RESULT([Assuming that right shift is signed on target machine.])])
+dnl --------------------------------------------------------------------
+AC_MSG_CHECKING([to see if fopen accepts b spec])
+AC_TRY_RUN([
+#include <stdio.h>
+main() {
+  if (fopen("conftestdata", "wb") != NULL)
+    exit(0);
+  exit(1);
+}],[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)
+AC_DEFINE([DONT_USE_B_MODE],)],[AC_MSG_RESULT([Assuming that it does.])])
+dnl --------------------------------------------------------------------
+AC_PROG_INSTALL
+AC_PROG_RANLIB
+dnl --------------------------------------------------------------------
+
+AC_CANONICAL_HOST
+AC_EXEEXT
+
+# Decide whether to use libtool,
+# and if so whether to build shared, static, or both flavors of library.
+AC_DISABLE_SHARED
+AC_DISABLE_STATIC
+if test "x$enable_shared" != xno  -o  "x$enable_static" != xno; then
+  USELIBTOOL="yes"
+# LIBTOOL="./libtool"
+  O="lo"
+  A="la"
+  LN='$(LIBTOOL) --mode=link $(CC)'
+  INSTALL_LIB='$(LIBTOOL) --mode=install ${INSTALL}'
+  INSTALL_PROGRAM="\$(LIBTOOL) --mode=install $INSTALL_PROGRAM"
+  UNINSTALL='$(LIBTOOL) --mode=uninstall $(RM)'
+else
+  USELIBTOOL="no"
+  LIBTOOL=""
+  O="o"
+  A="a"
+  LN='$(CC)'
+  INSTALL_LIB="$INSTALL_DATA"
+  UNINSTALL='$(RM)'
+fi
+AC_SUBST([LIBTOOL])
+AC_SUBST([O])
+AC_SUBST([A])
+AC_SUBST([LN])
+AC_SUBST([INSTALL_LIB])
+AC_SUBST([UNINSTALL])
+
+# Configure libtool if needed.
+if test $USELIBTOOL = yes; then
+  AC_LIBTOOL_DLOPEN
+  AC_LIBTOOL_WIN32_DLL
+  AC_PROG_LIBTOOL
+fi
+# if libtool >= 1.5
+TAGCC=ifdef([AC_LIBTOOL_GCJ],[--tag=CC])
+AC_SUBST([TAGCC])
+
+dnl --------------------------------------------------------------------
+# Select memory manager depending on user input.
+# If no "-enable-maxmem", use jmemnobs
+MEMORYMGR='jmemnobs.$(O)'
+MAXMEM="no"
+AC_ARG_ENABLE([maxmem],
+[  --enable-maxmem[=N]     enable use of temp files, set max mem usage to N MB],
+[MAXMEM="$enableval"])
+# support --with-maxmem for backwards compatibility with IJG V5.
+AC_ARG_WITH([maxmem],,[MAXMEM="$withval"])
+if test "x$MAXMEM" = xyes; then
+  MAXMEM=1
+fi
+if test "x$MAXMEM" != xno; then
+  if test -n "`echo $MAXMEM | sed 's/[[0-9]]//g'`"; then
+    AC_MSG_ERROR([non-numeric argument to --enable-maxmem])
+  fi
+  DEFAULTMAXMEM=`expr $MAXMEM \* 1048576`
+AC_DEFINE_UNQUOTED([DEFAULT_MAX_MEM],[${DEFAULTMAXMEM}])
+AC_MSG_CHECKING([for 'tmpfile()'])
+AC_TRY_LINK([#include <stdio.h>],[ FILE * tfile = tmpfile(); ],
+[AC_MSG_RESULT(yes)
+MEMORYMGR='jmemansi.$(O)'],
+[AC_MSG_RESULT(no)
+MEMORYMGR='jmemname.$(O)'
+AC_DEFINE([NEED_SIGNAL_CATCHER],)
+AC_MSG_CHECKING([for 'mktemp()'])
+AC_TRY_LINK(,[ char fname[80]; mktemp(fname); ],
+[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)
+AC_DEFINE([NO_MKTEMP],)])])
+fi
+AC_SUBST([MEMORYMGR])
+
+dnl ====================================================================
+
+AC_MSG_CHECKING([to see if the host cpu type is i386 or compatible])
+case "$host_cpu" in
+  i*86 | x86 | ia32)
+    AC_MSG_RESULT(yes)
+  ;;
+  x86_64 | amd64 | aa64)
+    AC_MSG_RESULT([no (x86_64)])
+    AC_MSG_ERROR([Currently, this version of JPEG library cannot be compiled as 64-bit code. sorry.])
+  ;;
+  *)
+    AC_MSG_RESULT([no ("$host_cpu")])
+    AC_MSG_ERROR([This version of JPEG library is for i386 or compatible processors only.])
+  ;;
+esac
+
+if test -z "$NAFLAGS" ; then
+  AC_MSG_CHECKING([for object file format of host system])
+  case "$host_os" in
+    cygwin* | mingw* | pw32* | interix*)
+      objfmt='Win32-COFF'
+    ;;
+    msdosdjgpp* | go32*)
+      objfmt='COFF'
+    ;;
+    os2-emx*)                  # not tested
+      objfmt='MSOMF'           # obj
+    ;;
+    linux*coff* | linux*oldld*)
+      objfmt='COFF'            # ???
+    ;;
+    linux*aout*)
+      objfmt='a.out'
+    ;;
+    linux*)
+      objfmt='ELF'
+    ;;
+    freebsd* | netbsd* | openbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+        objfmt='BSD-a.out'
+      else
+        objfmt='ELF'
+      fi
+    ;;
+    solaris* | sunos* | sysv* | sco*)
+      objfmt='ELF'
+    ;;
+    darwin* | rhapsody* | nextstep* | openstep* | macos*)
+      objfmt='Mach-O'
+    ;;
+    *)
+      objfmt='ELF ?'
+    ;;
+  esac
+  AC_MSG_RESULT([$objfmt])
+  if test "$objfmt" = 'ELF ?'; then
+    objfmt='ELF'
+    AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.])
+  fi
+else
+  objfmt=''
+fi
+AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ])
+case "$objfmt" in
+  MSOMF)      NAFLAGS='-fobj -DOBJ32';;
+  Win32-COFF) NAFLAGS='-fwin32 -DWIN32';;
+  COFF)       NAFLAGS='-fcoff -DCOFF';;
+  a.out)      NAFLAGS='-faout -DAOUT';;
+  BSD-a.out)  NAFLAGS='-faoutb -DAOUT';;
+  ELF)        NAFLAGS='-felf -DELF';;
+  RDF)        NAFLAGS='-frdf -DRDF';;
+  Mach-O)     NAFLAGS='-fmacho -DMACHO';;
+esac
+AC_MSG_RESULT([$NAFLAGS])
+AC_SUBST([NAFLAGS])
+
+dnl --------------------------------------------------------------------
+
+AC_CHECK_PROGS(NASM, [nasm nasmw])
+test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found in \$PATH])
+if echo "$NASM" | grep yasm > /dev/null; then
+  AC_MSG_WARN([DON'T USE YASM! CURRENT VERSION (R0.4.0) IS BUGGY!])
+fi
+
+AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works])
+cat > conftest.asm <<EOF
+[%line __oline__ "configure"
+        section .text
+        bits    32
+        global  _main,main
+_main:
+main:   xor     eax,eax
+        ret
+]EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if AC_TRY_EVAL(try_nasm) && test -s conftest.o; then
+  AC_MSG_RESULT(yes)
+else
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.asm >&AC_FD_CC
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.])
+fi
+AC_MSG_CHECKING([whether the linker accepts assembler output])
+try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC'
+if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  AC_MSG_RESULT(yes)
+else
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([configuration problem: maybe object file format mismatch.])
+fi
+
+AC_MSG_CHECKING([whether the assembler supports line continuation character])
+cat > conftest.asm <<\EOF
+[%line __oline__ "configure"
+; The line continuation character '\'
+; was introduced in nasm 0.98.25.
+        section .text
+        bits    32
+        global  _zero
+_zero:  xor     \
+                eax,eax
+        ret
+]EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if AC_TRY_EVAL(try_nasm) && test -s conftest.o; then
+  rm -rf conftest*
+  AC_MSG_RESULT(yes)
+else
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.asm >&AC_FD_CC
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([you have to use a more recent version of the assembler.])
+fi
+
+dnl --------------------------------------------------------------------
+
+AC_MSG_CHECKING([SIMD instruction sets requested to use])
+simd_to_use=""
+
+AC_ARG_ENABLE(mmx,
+[  --disable-mmx           do not use MMX instruction set],
+[if test "x$enableval" = xno; then
+  AC_DEFINE([JSIMD_MMX_NOT_SUPPORTED],)
+else
+  simd_to_use="$simd_to_use MMX"
+fi], [simd_to_use="$simd_to_use MMX"])
+
+AC_ARG_ENABLE(3dnow,
+[  --disable-3dnow         do not use 3DNow! instruction set],
+[if test "x$enableval" = xno; then
+  AC_DEFINE([JSIMD_3DNOW_NOT_SUPPORTED],)
+else
+  simd_to_use="$simd_to_use 3DNow!"
+fi], [simd_to_use="$simd_to_use 3DNow!"])
+
+AC_ARG_ENABLE(sse,
+[  --disable-sse           do not use SSE instruction set],
+[if test "x$enableval" = xno; then
+  AC_DEFINE([JSIMD_SSE_NOT_SUPPORTED],)
+else
+  simd_to_use="$simd_to_use SSE"
+fi], [simd_to_use="$simd_to_use SSE"])
+
+AC_ARG_ENABLE(sse2,
+[  --disable-sse2          do not use SSE2 instruction set],
+[if test "x$enableval" = xno; then
+  AC_DEFINE([JSIMD_SSE2_NOT_SUPPORTED],)
+else
+  simd_to_use="$simd_to_use SSE2"
+fi], [simd_to_use="$simd_to_use SSE2"])
+
+test -z "$simd_to_use" && simd_to_use="NONE"
+AC_MSG_RESULT([$simd_to_use])
+
+for simd_name in $simd_to_use; do
+case "$simd_name" in
+  MMX)    simd_instruction='psubw mm0,mm0';;
+  3DNow!) simd_instruction='pfsub mm0,mm0';;
+  SSE)    simd_instruction='subps xmm0,xmm0';;
+  SSE2)   simd_instruction='subpd xmm0,xmm0';;
+  *)      continue;;
+esac
+AC_MSG_CHECKING([whether the assembler supports $simd_name instructions])
+cat > conftest.asm <<EOF
+[%line __oline__ "configure"
+        section .text
+        bits    32
+        global  _simd
+_simd:  $simd_instruction
+        ret
+]EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if AC_TRY_EVAL(try_nasm) && test -s conftest.o; then
+  rm -rf conftest*
+  AC_MSG_RESULT(yes)
+else
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.asm >&AC_FD_CC
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([you have to use a more recent version of the assembler.])
+fi
+done
+
+dnl --------------------------------------------------------------------
+# Select OS-dependent SIMD instruction support checker.
+# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc)
+if test "x$SIMDCHECKER" = x ; then
+  case "$host_os" in
+    cygwin* | mingw* | pw32* | interix*)
+      SIMDCHECKER='jsimdw32.$(O)'
+    ;;
+    msdosdjgpp* | go32*)
+      SIMDCHECKER='jsimddjg.$(O)'
+    ;;
+    os2-emx*)                  # not tested
+      SIMDCHECKER='jsimdgcc.$(O)'
+    ;;
+    *)
+      SIMDCHECKER='jsimdgcc.$(O)'
+    ;;
+  esac
+fi
+AC_SUBST([SIMDCHECKER])
+
+case "$host_os" in
+  cygwin* | mingw* | pw32* | os2-emx* | msdosdjgpp* | go32*)
+    AC_DEFINE([USE_SETMODE],)
+  ;;
+# _host_name_*)
+#   AC_DEFINE([USE_FDOPEN],)
+# ;;
+esac
+
+# This is for UNIX-like environments on Windows platform.
+AC_ARG_ENABLE(uchar-boolean,
+[  --enable-uchar-boolean  define type \"boolean\" as unsigned char (for Windows)],
+[if test "x$enableval" != xno; then
+  AC_DEFINE([TYPEDEF_UCHAR_BOOLEAN],)
+fi])
+
+dnl --------------------------------------------------------------------
+
+JPEG_LIB_VERSION="63:0:1"
+confv_dirs="$srcdir $srcdir/.. $srcdir/../.."
+config_ver=
+for ac_dir in $confv_dirs; do
+  if test -r $ac_dir/config.ver; then
+    config_ver=$ac_dir/config.ver
+    break
+  fi
+done
+if test -z "$config_ver"; then
+  AC_MSG_WARN([cannot find config.ver in $confv_dirs])
+  AC_MSG_WARN([default version number $JPEG_LIB_VERSION is used])
+  AC_MSG_CHECKING([libjpeg version number for libtool])
+  AC_MSG_RESULT([$JPEG_LIB_VERSION])
+else
+  AC_MSG_CHECKING([libjpeg version number for libtool])
+  . $config_ver
+  AC_MSG_RESULT([$JPEG_LIB_VERSION])
+  echo "configure: if you want to change the version number, modify $config_ver" 1>&2
+fi
+AC_SUBST([JPEG_LIB_VERSION])
+
+dnl --------------------------------------------------------------------
+# Prepare to massage makefile.cfg correctly.
+if test $ijg_cv_have_prototypes = yes; then
+  A2K_DEPS=""
+  COM_A2K="# "
+else
+  A2K_DEPS="ansi2knr"
+  COM_A2K=""
+fi
+AC_SUBST([A2K_DEPS])
+AC_SUBST([COM_A2K])
+# ansi2knr needs -DBSD if string.h is missing
+if test $ac_cv_header_string_h = no; then
+  ANSI2KNRFLAGS="-DBSD"
+else
+  ANSI2KNRFLAGS=""
+fi
+AC_SUBST([ANSI2KNRFLAGS])
+# Substitutions to enable or disable libtool-related stuff
+if test $USELIBTOOL = yes -a $ijg_cv_have_prototypes = yes; then
+  COM_LT=""
+else
+  COM_LT="# "
+fi
+AC_SUBST([COM_LT])
+if test "x$enable_shared" != xno; then
+  FORCE_INSTALL_LIB="install-lib"
+  UNINSTALL_LIB="uninstall-lib"
+else
+  FORCE_INSTALL_LIB=""
+  UNINSTALL_LIB=""
+fi
+AC_SUBST([FORCE_INSTALL_LIB])
+AC_SUBST([UNINSTALL_LIB])
+# Set up -I directives
+if test "x$srcdir" = x.; then
+  INCLUDEFLAGS='-I$(srcdir)'
+else
+  INCLUDEFLAGS='-I. -I$(srcdir)'
+fi
+AC_SUBST([INCLUDEFLAGS])
+dnl --------------------------------------------------------------------
+AC_OUTPUT([Makefile:makefile.cfg])
diff --git a/djpeg.c b/djpeg.c
index e099e90aee35fa7e092e3909ee351fe8831ac05c..a1ec059ec151c96b48806f172f434b5f9ac5d9a6 100644 (file)
--- a/djpeg.c
+++ b/djpeg.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : August 23, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains a command-line user interface for the JPEG decompressor.
  * It should work on any system with Unix- or MS-DOS-style command lines.
  *
@@ -158,6 +165,22 @@ usage (void)
 }
 
 
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+LOCAL(void)
+print_simd_info (FILE * file, char * labelstr, unsigned int simd)
+{
+  fprintf(file, "%s%s%s%s%s%s\n", labelstr,
+         simd & JSIMD_MMX   ? " MMX"    : "",
+         simd & JSIMD_3DNOW ? " 3DNow!" : "",
+         simd & JSIMD_SSE   ? " SSE"    : "",
+         simd & JSIMD_SSE2  ? " SSE2"   : "",
+         simd == JSIMD_NONE ? " NONE"   : "");
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+
+
 LOCAL(int)
 parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
                int last_file_arg_seen, boolean for_real)
@@ -208,6 +231,19 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
       cinfo->desired_number_of_colors = val;
       cinfo->quantize_colors = TRUE;
 
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+    } else if (keymatch(arg, "nosimd" , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL);
+    } else if (keymatch(arg, "nommx"  , 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX);
+    } else if (keymatch(arg, "no3dnow", 3)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW);
+    } else if (keymatch(arg, "nosse"  , 4)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE);
+    } else if (keymatch(arg, "nosse2" , 6)) {
+      jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2);
+#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */
+
     } else if (keymatch(arg, "dct", 2)) {
       /* Select IDCT algorithm. */
       if (++argn >= argc)      /* advance to next argument */
@@ -242,6 +278,38 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
       if (! printed_version) {
        fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n",
                JVERSION, JCOPYRIGHT);
+       fprintf(stderr,
+               "\nx86 SIMD extension for IJG JPEG library, version %s\n\n",
+               JPEG_SIMDEXT_VER_STR);
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+       print_simd_info(stderr, "SIMD instructions supported by the system :",
+                       jpeg_simd_support(NULL));
+
+       fprintf(stderr, "\n      === SIMD Operation Modes ===\n");
+#ifdef DCT_ISLOW_SUPPORTED
+       print_simd_info(stderr, "Accurate integer DCT  (-dct int)   :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_ISLOW));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+       print_simd_info(stderr, "Fast integer DCT      (-dct fast)  :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_IFAST));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+       print_simd_info(stderr, "Floating-point DCT    (-dct float) :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT));
+#endif
+#ifdef IDCT_SCALING_SUPPORTED
+       print_simd_info(stderr, "Reduced-size DCT      (-scale M/N) :",
+                       jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT+1));
+#endif
+       print_simd_info(stderr, "High-quality upsampling (default)  :",
+                       jpeg_simd_upsampler(cinfo, TRUE));
+       print_simd_info(stderr, "Low-quality upsampling (-nosmooth) :",
+                       jpeg_simd_upsampler(cinfo, FALSE));
+       print_simd_info(stderr, "Colorspace conversion (YCbCr->RGB) :",
+                       jpeg_simd_color_deconverter(cinfo));
+       fprintf(stderr, "\n");
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
index e8436696c19d1bfd9e4d53c7113deb6a991818a8..4d4a9519eaf88b18fb157dfe5fae59c1c5d005c7 100755 (executable)
@@ -1,19 +1,38 @@
 #!/bin/sh
-#
 # install - install a program, script, or datafile
-# This comes from X11R5 (mit/util/scripts/install.sh).
+
+scriptversion=2005-05-14.22
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
 #
-# Copyright 1991 by the Massachusetts Institute of Technology
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# Permission to use, copy, modify, distribute, and sell this software and its
-# documentation for any purpose is hereby granted without fee, provided that
-# the above copyright notice appear in all copies and that both that
-# copyright notice and this permission notice appear in supporting
-# documentation, and that the name of M.I.T. not be used in advertising or
-# publicity pertaining to distribution of the software without specific,
-# written prior permission.  M.I.T. makes no representations about the
-# suitability of this software for any purpose.  It is provided "as is"
-# without express or implied warranty.
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
 #
 # Calling this script install-sh is preferred over install.sh, to prevent
 # `make' implicit rules from creating a file called install from it
 # from scratch.  It can only install one file at a time, a restriction
 # shared with many OS's install programs.
 
-
 # set DOITPROG to echo to test this script
 
 # Don't use :- since 4.3BSD and earlier shells don't like it.
 doit="${DOITPROG-}"
 
-
 # put in absolute paths if you don't have them in your path; or use env. vars.
 
 mvprog="${MVPROG-mv}"
@@ -41,210 +58,266 @@ stripprog="${STRIPPROG-strip}"
 rmprog="${RMPROG-rm}"
 mkdirprog="${MKDIRPROG-mkdir}"
 
-transformbasename=""
-transform_arg=""
-instcmd="$mvprog"
 chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
+chowncmd=
+chgrpcmd=
+stripcmd=
 rmcmd="$rmprog -f"
 mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
-
-while [ x"$1" != x ]; do
-    case $1 in
-       -c) instcmd="$cpprog"
-           shift
-           continue;;
-
-       -d) dir_arg=true
-           shift
-           continue;;
-
-       -m) chmodcmd="$chmodprog $2"
-           shift
-           shift
-           continue;;
-
-       -o) chowncmd="$chownprog $2"
-           shift
-           shift
-           continue;;
-
-       -g) chgrpcmd="$chgrpprog $2"
-           shift
-           shift
-           continue;;
-
-       -s) stripcmd="$stripprog"
-           shift
-           continue;;
-
-       -t=*) transformarg=`echo $1 | sed 's/-t=//'`
-           shift
-           continue;;
-
-       -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
-           shift
-           continue;;
-
-       *)  if [ x"$src" = x ]
-           then
-               src=$1
-           else
-               # this colon is to work around a 386BSD /bin/sh bug
-               :
-               dst=$1
-           fi
-           shift
-           continue;;
-    esac
-done
-
-if [ x"$src" = x ]
-then
-       echo "install:  no input file specified"
-       exit 1
-else
-       true
-fi
-
-if [ x"$dir_arg" != x ]; then
-       dst=$src
-       src=""
-       
-       if [ -d $dst ]; then
-               instcmd=:
-       else
-               instcmd=mkdir
-       fi
-else
-
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad 
-# if $src (and thus $dsttmp) contains '*'.
-
-       if [ -f $src -o -d $src ]
-       then
-               true
-       else
-               echo "install:  $src does not exist"
-               exit 1
-       fi
-       
-       if [ x"$dst" = x ]
-       then
-               echo "install:  no destination specified"
-               exit 1
-       else
-               true
-       fi
-
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
-       if [ -d $dst ]
-       then
-               dst="$dst"/`basename $src`
-       else
-               true
-       fi
-fi
-
-## this sed command emulates the dirname command
-dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
-# Make sure that the destination directory exists.
-#  this part is taken from Noah Friedman's mkinstalldirs script
-
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='   
-'
-IFS="${IFS-${defaultIFS}}"
-
-oIFS="${IFS}"
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS="${oIFS}"
-
-pathcomp=''
-
-while [ $# -ne 0 ] ; do
-       pathcomp="${pathcomp}${1}"
+src=
+dst=
+dir_arg=
+dstarg=
+no_target_directory=
+
+usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+-c         (ignored)
+-d         create directories instead of installing files.
+-g GROUP   $chgrpprog installed files to GROUP.
+-m MODE    $chmodprog installed files to MODE.
+-o USER    $chownprog installed files to USER.
+-s         $stripprog installed files.
+-t DIRECTORY  install into DIRECTORY.
+-T         report an error if DSTFILE is a directory.
+--help     display this help and exit.
+--version  display version info and exit.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
+
+while test -n "$1"; do
+  case $1 in
+    -c) shift
+        continue;;
+
+    -d) dir_arg=true
+        shift
+        continue;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift
+        shift
+        continue;;
+
+    --help) echo "$usage"; exit $?;;
+
+    -m) chmodcmd="$chmodprog $2"
+        shift
+        shift
+        continue;;
+
+    -o) chowncmd="$chownprog $2"
+        shift
+        shift
+        continue;;
+
+    -s) stripcmd=$stripprog
+        shift
+        continue;;
+
+    -t) dstarg=$2
        shift
+       shift
+       continue;;
 
-       if [ ! -d "${pathcomp}" ] ;
-        then
-               $mkdirprog "${pathcomp}"
-       else
-               true
-       fi
-
-       pathcomp="${pathcomp}/"
+    -T) no_target_directory=true
+       shift
+       continue;;
+
+    --version) echo "$0 $scriptversion"; exit $?;;
+
+    *)  # When -d is used, all remaining arguments are directories to create.
+       # When -t is used, the destination is already specified.
+       test -n "$dir_arg$dstarg" && break
+        # Otherwise, the last argument is the destination.  Remove it from $@.
+       for arg
+       do
+          if test -n "$dstarg"; then
+           # $@ is not empty: it contains at least $arg.
+           set fnord "$@" "$dstarg"
+           shift # fnord
+         fi
+         shift # arg
+         dstarg=$arg
+       done
+       break;;
+  esac
 done
-fi
-
-if [ x"$dir_arg" != x ]
-then
-       $doit $instcmd $dst &&
-
-       if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
-       if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
-       if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
-       if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
-else
-
-# If we're going to rename the final executable, determine the name now.
-
-       if [ x"$transformarg" = x ] 
-       then
-               dstfile=`basename $dst`
-       else
-               dstfile=`basename $dst $transformbasename | 
-                       sed $transformarg`$transformbasename
-       fi
-
-# don't allow the sed command to completely eliminate the filename
-
-       if [ x"$dstfile" = x ] 
-       then
-               dstfile=`basename $dst`
-       else
-               true
-       fi
-
-# Make a temp file name in the proper directory.
-
-       dsttmp=$dstdir/#inst.$$#
 
-# Move or copy the file name to the temp name
-
-       $doit $instcmd $src $dsttmp &&
-
-       trap "rm -f ${dsttmp}" 0 &&
-
-# and set any options; do chmod last to preserve setuid bits
-
-# If any of these fail, we abort the whole thing.  If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
-       if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
-       if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
-       if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
-       if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
-
-# Now rename the file to the real destination.
-
-       $doit $rmcmd -f $dstdir/$dstfile &&
-       $doit $mvcmd $dsttmp $dstdir/$dstfile 
+if test -z "$1"; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call `install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
 
-fi &&
+for src
+do
+  # Protect names starting with `-'.
+  case $src in
+    -*) src=./$src ;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    src=
+
+    if test -d "$dst"; then
+      mkdircmd=:
+      chmodcmd=
+    else
+      mkdircmd=$mkdirprog
+    fi
+  else
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dstarg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+
+    dst=$dstarg
+    # Protect names starting with `-'.
+    case $dst in
+      -*) dst=./$dst ;;
+    esac
 
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      if test -n "$no_target_directory"; then
+       echo "$0: $dstarg: Is a directory" >&2
+       exit 1
+      fi
+      dst=$dst/`basename "$src"`
+    fi
+  fi
+
+  # This sed command emulates the dirname command.
+  dstdir=`echo "$dst" | sed -e 's,/*$,,;s,[^/]*$,,;s,/*$,,;s,^$,.,'`
+
+  # Make sure that the destination directory exists.
+
+  # Skip lots of stat calls in the usual case.
+  if test ! -d "$dstdir"; then
+    defaultIFS='
+        '
+    IFS="${IFS-$defaultIFS}"
+
+    oIFS=$IFS
+    # Some sh's can't handle IFS=/ for some reason.
+    IFS='%'
+    set x `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+    shift
+    IFS=$oIFS
+
+    pathcomp=
+
+    while test $# -ne 0 ; do
+      pathcomp=$pathcomp$1
+      shift
+      if test ! -d "$pathcomp"; then
+        $mkdirprog "$pathcomp"
+       # mkdir can fail with a `File exist' error in case several
+       # install-sh are creating the directory concurrently.  This
+       # is OK.
+       test -d "$pathcomp" || exit
+      fi
+      pathcomp=$pathcomp/
+    done
+  fi
+
+  if test -n "$dir_arg"; then
+    $doit $mkdircmd "$dst" \
+      && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
+
+  else
+    dstfile=`basename "$dst"`
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+    trap '(exit $?); exit' 1 2 13 15
+
+    # Copy the file name to the temp name.
+    $doit $cpprog "$src" "$dsttmp" &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
+
+    # Now rename the file to the real destination.
+    { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
+      || {
+          # The rename failed, perhaps because mv can't rename something else
+          # to itself, or perhaps because mv is so ancient that it does not
+          # support -f.
+
+          # Now remove or move aside any old file at destination location.
+          # We try this two ways since rm can't unlink itself on some
+          # systems and the destination file might be busy for other
+          # reasons.  In this case, the final cleanup might fail but the new
+          # file should still install successfully.
+          {
+            if test -f "$dstdir/$dstfile"; then
+              $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+              || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+              || {
+                echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+                (exit 1); exit 1
+              }
+            else
+              :
+            fi
+          } &&
+
+          # Now rename the file to the real destination.
+          $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+        }
+    }
+  fi || { (exit 1); exit 1; }
+done
 
-exit 0
+# The final little trick to "correctly" pass the exit status to the exit trap.
+{
+  (exit 0); exit 0
+}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/jccolmmx.asm b/jccolmmx.asm
new file mode 100644 (file)
index 0000000..2e2fca6
--- /dev/null
@@ -0,0 +1,513 @@
+;
+; jccolmmx.asm - colorspace conversion (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_081        equ      5329                   ; FIX(0.08131)
+F_0_114        equ      7471                   ; FIX(0.11400)
+F_0_168        equ     11059                   ; FIX(0.16874)
+F_0_250        equ     16384                   ; FIX(0.25000)
+F_0_299        equ     19595                   ; FIX(0.29900)
+F_0_331        equ     21709                   ; FIX(0.33126)
+F_0_418        equ     27439                   ; FIX(0.41869)
+F_0_587        equ     38470                   ; FIX(0.58700)
+F_0_337        equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_rgb_ycc_convert_mmx)
+
+EXTN(jconst_rgb_ycc_convert_mmx):
+
+PW_F0299_F0337 times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250 times 2 dw  F_0_114, F_0_250
+PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331
+PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ        times 2 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF     times 2 dd  (1 << (SCALEBITS-1))
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jpeg_rgb_ycc_convert_mmx (j_compress_ptr cinfo,
+;                           JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                           JDIMENSION output_row, int num_rows);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define input_buf(b)   (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)  (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)  (b)+20          ; JDIMENSION output_row
+%define num_rows(b)    (b)+24          ; int num_rows
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         8
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_rgb_ycc_convert_mmx)
+
+EXTN(jpeg_rgb_ycc_convert_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jcstruct_image_width(ecx)]     ; num_cols
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     esi, JSAMPIMAGE [output_buf(eax)]
+       mov     ecx, JDIMENSION [output_row(eax)]
+       mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+       lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+       lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+       lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+       pop     ecx
+
+       mov     esi, JSAMPARRAY [input_buf(eax)]
+       mov     eax, INT [num_rows(eax)]
+       test    eax,eax
+       jle     near .return
+       alignx  16,7
+.rowloop:
+       pushpic eax
+       push    edx
+       push    ebx
+       push    edi
+       push    esi
+       push    ecx                     ; col
+
+       mov     esi, JSAMPROW [esi]     ; inptr
+       mov     edi, JSAMPROW [edi]     ; outptr0
+       mov     ebx, JSAMPROW [ebx]     ; outptr1
+       mov     edx, JSAMPROW [edx]     ; outptr2
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jae     short .columnloop
+       alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+       push    eax
+       push    edx
+       lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+       test    cl, SIZEOF_BYTE
+       jz      short .column_ld2
+       sub     ecx, byte SIZEOF_BYTE
+       xor     eax,eax
+       mov     al, BYTE [esi+ecx]
+.column_ld2:
+       test    cl, SIZEOF_WORD
+       jz      short .column_ld4
+       sub     ecx, byte SIZEOF_WORD
+       xor     edx,edx
+       mov     dx, WORD [esi+ecx]
+       shl     eax, WORD_BIT
+       or      eax,edx
+.column_ld4:
+       movd    mmA,eax
+       pop     edx
+       pop     eax
+       test    cl, SIZEOF_DWORD
+       jz      short .column_ld8
+       sub     ecx, byte SIZEOF_DWORD
+       movd    mmG, DWORD [esi+ecx]
+       psllq   mmA, DWORD_BIT
+       por     mmA,mmG
+.column_ld8:
+       test    cl, SIZEOF_MMWORD
+       jz      short .column_ld16
+       movq    mmG,mmA
+       movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+       mov     ecx, SIZEOF_MMWORD
+       jmp     short .rgb_ycc_cnv
+.column_ld16:
+       test    cl, 2*SIZEOF_MMWORD
+       mov     ecx, SIZEOF_MMWORD
+       jz      short .rgb_ycc_cnv
+       movq    mmF,mmA
+       movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+       jmp     short .rgb_ycc_cnv
+       alignx  16,7
+
+.columnloop:
+       movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+       movq    mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+
+.rgb_ycc_cnv:
+       ; mmA=(00 10 20 01 11 21 02 12)
+       ; mmG=(22 03 13 23 04 14 24 05)
+       ; mmF=(15 25 06 16 26 07 17 27)
+
+       movq      mmD,mmA
+       psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 10 20 01)
+       psrlq     mmD,4*BYTE_BIT        ; mmD=(11 21 02 12 -- -- -- --)
+
+       punpckhbw mmA,mmG               ; mmA=(00 04 10 14 20 24 01 05)
+       psllq     mmG,4*BYTE_BIT        ; mmG=(-- -- -- -- 22 03 13 23)
+
+       punpcklbw mmD,mmF               ; mmD=(11 15 21 25 02 06 12 16)
+       punpckhbw mmG,mmF               ; mmG=(22 26 03 07 13 17 23 27)
+
+       movq      mmE,mmA
+       psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 04 10 14)
+       psrlq     mmE,4*BYTE_BIT        ; mmE=(20 24 01 05 -- -- -- --)
+
+       punpckhbw mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+       psllq     mmD,4*BYTE_BIT        ; mmD=(-- -- -- -- 11 15 21 25)
+
+       punpcklbw mmE,mmG               ; mmE=(20 22 24 26 01 03 05 07)
+       punpckhbw mmD,mmG               ; mmD=(11 13 15 17 21 23 25 27)
+
+       pxor      mmH,mmH
+
+       movq      mmC,mmA
+       punpcklbw mmA,mmH               ; mmA=(00 02 04 06)
+       punpckhbw mmC,mmH               ; mmC=(10 12 14 16)
+
+       movq      mmB,mmE
+       punpcklbw mmE,mmH               ; mmE=(20 22 24 26)
+       punpckhbw mmB,mmH               ; mmB=(01 03 05 07)
+
+       movq      mmF,mmD
+       punpcklbw mmD,mmH               ; mmD=(11 13 15 17)
+       punpckhbw mmF,mmH               ; mmF=(21 23 25 27)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+       test    cl, SIZEOF_MMWORD/8
+       jz      short .column_ld2
+       sub     ecx, byte SIZEOF_MMWORD/8
+       movd    mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+       test    cl, SIZEOF_MMWORD/4
+       jz      short .column_ld4
+       sub     ecx, byte SIZEOF_MMWORD/4
+       movq    mmF,mmA
+       movq    mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld4:
+       test    cl, SIZEOF_MMWORD/2
+       mov     ecx, SIZEOF_MMWORD
+       jz      short .rgb_ycc_cnv
+       movq    mmD,mmA
+       movq    mmC,mmF
+       movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+       jmp     short .rgb_ycc_cnv
+       alignx  16,7
+
+.columnloop:
+       movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+       movq    mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+       movq    mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+
+.rgb_ycc_cnv:
+       ; mmA=(00 10 20 30 01 11 21 31)
+       ; mmF=(02 12 22 32 03 13 23 33)
+       ; mmD=(04 14 24 34 05 15 25 35)
+       ; mmC=(06 16 26 36 07 17 27 37)
+
+       movq      mmB,mmA
+       punpcklbw mmA,mmF               ; mmA=(00 02 10 12 20 22 30 32)
+       punpckhbw mmB,mmF               ; mmB=(01 03 11 13 21 23 31 33)
+
+       movq      mmG,mmD
+       punpcklbw mmD,mmC               ; mmD=(04 06 14 16 24 26 34 36)
+       punpckhbw mmG,mmC               ; mmG=(05 07 15 17 25 27 35 37)
+
+       movq      mmE,mmA
+       punpcklwd mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+       punpckhwd mmE,mmD               ; mmE=(20 22 24 26 30 32 34 36)
+
+       movq      mmH,mmB
+       punpcklwd mmB,mmG               ; mmB=(01 03 05 07 11 13 15 17)
+       punpckhwd mmH,mmG               ; mmH=(21 23 25 27 31 33 35 37)
+
+       pxor      mmF,mmF
+
+       movq      mmC,mmA
+       punpcklbw mmA,mmF               ; mmA=(00 02 04 06)
+       punpckhbw mmC,mmF               ; mmC=(10 12 14 16)
+
+       movq      mmD,mmB
+       punpcklbw mmB,mmF               ; mmB=(01 03 05 07)
+       punpckhbw mmD,mmF               ; mmD=(11 13 15 17)
+
+       movq      mmG,mmE
+       punpcklbw mmE,mmF               ; mmE=(20 22 24 26)
+       punpckhbw mmG,mmF               ; mmG=(30 32 34 36)
+
+       punpcklbw mmF,mmH
+       punpckhbw mmH,mmH
+       psrlw     mmF,BYTE_BIT          ; mmF=(21 23 25 27)
+       psrlw     mmH,BYTE_BIT          ; mmH=(31 33 35 37)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+       ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+       ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+
+       ; (Original)
+       ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+       ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+       ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+       ;
+       ; (This implementation)
+       ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+       ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+       ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+
+       movq      MMWORD [wk(0)], mm0   ; wk(0)=RE
+       movq      MMWORD [wk(1)], mm1   ; wk(1)=RO
+       movq      MMWORD [wk(2)], mm4   ; wk(2)=BE
+       movq      MMWORD [wk(3)], mm5   ; wk(3)=BO
+
+       movq      mm6,mm1
+       punpcklwd mm1,mm3
+       punpckhwd mm6,mm3
+       movq      mm7,mm1
+       movq      mm4,mm6
+       pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+       pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+       pmaddwd   mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+       pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+
+       movq      MMWORD [wk(4)], mm1   ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+       movq      MMWORD [wk(5)], mm6   ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+       pxor      mm1,mm1
+       pxor      mm6,mm6
+       punpcklwd mm1,mm5               ; mm1=BOL
+       punpckhwd mm6,mm5               ; mm6=BOH
+       psrld     mm1,1                 ; mm1=BOL*FIX(0.500)
+       psrld     mm6,1                 ; mm6=BOH*FIX(0.500)
+
+       movq      mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
+
+       paddd     mm7,mm1
+       paddd     mm4,mm6
+       paddd     mm7,mm5
+       paddd     mm4,mm5
+       psrld     mm7,SCALEBITS         ; mm7=CbOL
+       psrld     mm4,SCALEBITS         ; mm4=CbOH
+       packssdw  mm7,mm4               ; mm7=CbO
+
+       movq      mm1, MMWORD [wk(2)]   ; mm1=BE
+
+       movq      mm6,mm0
+       punpcklwd mm0,mm2
+       punpckhwd mm6,mm2
+       movq      mm5,mm0
+       movq      mm4,mm6
+       pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+       pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+       pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+
+       movq      MMWORD [wk(6)], mm0   ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+       movq      MMWORD [wk(7)], mm6   ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+       pxor      mm0,mm0
+       pxor      mm6,mm6
+       punpcklwd mm0,mm1               ; mm0=BEL
+       punpckhwd mm6,mm1               ; mm6=BEH
+       psrld     mm0,1                 ; mm0=BEL*FIX(0.500)
+       psrld     mm6,1                 ; mm6=BEH*FIX(0.500)
+
+       movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+
+       paddd     mm5,mm0
+       paddd     mm4,mm6
+       paddd     mm5,mm1
+       paddd     mm4,mm1
+       psrld     mm5,SCALEBITS         ; mm5=CbEL
+       psrld     mm4,SCALEBITS         ; mm4=CbEH
+       packssdw  mm5,mm4               ; mm5=CbE
+
+       psllw     mm7,BYTE_BIT
+       por       mm5,mm7               ; mm5=Cb
+       movq      MMWORD [ebx], mm5     ; Save Cb
+
+       movq      mm0, MMWORD [wk(3)]   ; mm0=BO
+       movq      mm6, MMWORD [wk(2)]   ; mm6=BE
+       movq      mm1, MMWORD [wk(1)]   ; mm1=RO
+
+       movq      mm4,mm0
+       punpcklwd mm0,mm3
+       punpckhwd mm4,mm3
+       movq      mm7,mm0
+       movq      mm5,mm4
+       pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+       pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+       pmaddwd   mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+
+       movq      mm3,[GOTOFF(eax,PD_ONEHALF)]  ; mm3=[PD_ONEHALF]
+
+       paddd     mm0, MMWORD [wk(4)]
+       paddd     mm4, MMWORD [wk(5)]
+       paddd     mm0,mm3
+       paddd     mm4,mm3
+       psrld     mm0,SCALEBITS         ; mm0=YOL
+       psrld     mm4,SCALEBITS         ; mm4=YOH
+       packssdw  mm0,mm4               ; mm0=YO
+
+       pxor      mm3,mm3
+       pxor      mm4,mm4
+       punpcklwd mm3,mm1               ; mm3=ROL
+       punpckhwd mm4,mm1               ; mm4=ROH
+       psrld     mm3,1                 ; mm3=ROL*FIX(0.500)
+       psrld     mm4,1                 ; mm4=ROH*FIX(0.500)
+
+       movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+
+       paddd     mm7,mm3
+       paddd     mm5,mm4
+       paddd     mm7,mm1
+       paddd     mm5,mm1
+       psrld     mm7,SCALEBITS         ; mm7=CrOL
+       psrld     mm5,SCALEBITS         ; mm5=CrOH
+       packssdw  mm7,mm5               ; mm7=CrO
+
+       movq      mm3, MMWORD [wk(0)]   ; mm3=RE
+
+       movq      mm4,mm6
+       punpcklwd mm6,mm2
+       punpckhwd mm4,mm2
+       movq      mm1,mm6
+       movq      mm5,mm4
+       pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+       pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+       pmaddwd   mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+
+       movq      mm2,[GOTOFF(eax,PD_ONEHALF)]  ; mm2=[PD_ONEHALF]
+
+       paddd     mm6, MMWORD [wk(6)]
+       paddd     mm4, MMWORD [wk(7)]
+       paddd     mm6,mm2
+       paddd     mm4,mm2
+       psrld     mm6,SCALEBITS         ; mm6=YEL
+       psrld     mm4,SCALEBITS         ; mm4=YEH
+       packssdw  mm6,mm4               ; mm6=YE
+
+       psllw     mm0,BYTE_BIT
+       por       mm6,mm0               ; mm6=Y
+       movq      MMWORD [edi], mm6     ; Save Y
+
+       pxor      mm2,mm2
+       pxor      mm4,mm4
+       punpcklwd mm2,mm3               ; mm2=REL
+       punpckhwd mm4,mm3               ; mm4=REH
+       psrld     mm2,1                 ; mm2=REL*FIX(0.500)
+       psrld     mm4,1                 ; mm4=REH*FIX(0.500)
+
+       movq      mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
+
+       paddd     mm1,mm2
+       paddd     mm5,mm4
+       paddd     mm1,mm0
+       paddd     mm5,mm0
+       psrld     mm1,SCALEBITS         ; mm1=CrEL
+       psrld     mm5,SCALEBITS         ; mm5=CrEH
+       packssdw  mm1,mm5               ; mm1=CrE
+
+       psllw     mm7,BYTE_BIT
+       por       mm1,mm7               ; mm1=Cr
+       movq      MMWORD [edx], mm1     ; Save Cr
+
+       sub     ecx, byte SIZEOF_MMWORD
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; inptr
+       add     edi, byte SIZEOF_MMWORD                 ; outptr0
+       add     ebx, byte SIZEOF_MMWORD                 ; outptr1
+       add     edx, byte SIZEOF_MMWORD                 ; outptr2
+       cmp     ecx, byte SIZEOF_MMWORD
+       jae     near .columnloop
+       test    ecx,ecx
+       jnz     near .column_ld1
+
+       pop     ecx                     ; col
+       pop     esi
+       pop     edi
+       pop     ebx
+       pop     edx
+       poppic  eax
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+       add     edi, byte SIZEOF_JSAMPROW
+       add     ebx, byte SIZEOF_JSAMPROW
+       add     edx, byte SIZEOF_JSAMPROW
+       dec     eax                             ; num_rows
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JCCOLOR_RGBYCC_MMX_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
index 0a8a4b5d13c303c4d14cd5b4333adcbb3cfcc4a2..85f30835f3e142885c78a21d95eeb65c84a4e968 100644 (file)
--- a/jccolor.c
+++ b/jccolor.c
@@ -5,12 +5,20 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file contains input colorspace conversion routines.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcolsamp.h"          /* Private declarations */
 
 
 /* Private subobject */
@@ -352,6 +360,7 @@ GLOBAL(void)
 jinit_color_converter (j_compress_ptr cinfo)
 {
   my_cconvert_ptr cconvert;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -420,8 +429,23 @@ jinit_color_converter (j_compress_ptr cinfo)
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (cinfo->in_color_space == JCS_RGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2 &&
+          IS_CONST_ALIGNED_16(jconst_rgb_ycc_convert_sse2)) {
+        cconvert->pub.color_convert = jpeg_rgb_ycc_convert_sse2;
+      } else
+#endif
+#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED
+      if (simd & JSIMD_MMX) {
+        cconvert->pub.color_convert = jpeg_rgb_ycc_convert_mmx;
+      } else
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+      {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub.color_convert = rgb_ycc_convert;
+      }
     } else if (cinfo->in_color_space == JCS_YCbCr)
       cconvert->pub.color_convert = null_convert;
     else
@@ -457,3 +481,28 @@ jinit_color_converter (j_compress_ptr cinfo)
     break;
   }
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_color_converter (j_compress_ptr cinfo)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED
+  if (simd & JSIMD_SSE2 &&
+      IS_CONST_ALIGNED_16(jconst_rgb_ycc_convert_sse2))
+    return JSIMD_SSE2;
+#endif
+#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED
+  if (simd & JSIMD_MMX)
+    return JSIMD_MMX;
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+
+  return JSIMD_NONE;
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
diff --git a/jccolss2.asm b/jccolss2.asm
new file mode 100644 (file)
index 0000000..1aabd89
--- /dev/null
@@ -0,0 +1,541 @@
+;
+; jccolss2.asm - colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_081        equ      5329                   ; FIX(0.08131)
+F_0_114        equ      7471                   ; FIX(0.11400)
+F_0_168        equ     11059                   ; FIX(0.16874)
+F_0_250        equ     16384                   ; FIX(0.25000)
+F_0_299        equ     19595                   ; FIX(0.29900)
+F_0_331        equ     21709                   ; FIX(0.33126)
+F_0_418        equ     27439                   ; FIX(0.41869)
+F_0_587        equ     38470                   ; FIX(0.58700)
+F_0_337        equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_rgb_ycc_convert_sse2)
+
+EXTN(jconst_rgb_ycc_convert_sse2):
+
+PW_F0299_F0337 times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250 times 4 dw  F_0_114, F_0_250
+PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331
+PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ        times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF     times 4 dd  (1 << (SCALEBITS-1))
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jpeg_rgb_ycc_convert_sse2 (j_compress_ptr cinfo,
+;                            JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                            JDIMENSION output_row, int num_rows);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define input_buf(b)   (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)  (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)  (b)+20          ; JDIMENSION output_row
+%define num_rows(b)    (b)+24          ; int num_rows
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         8
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_rgb_ycc_convert_sse2)
+
+EXTN(jpeg_rgb_ycc_convert_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jcstruct_image_width(ecx)]     ; num_cols
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     esi, JSAMPIMAGE [output_buf(eax)]
+       mov     ecx, JDIMENSION [output_row(eax)]
+       mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+       lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+       lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+       lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+       pop     ecx
+
+       mov     esi, JSAMPARRAY [input_buf(eax)]
+       mov     eax, INT [num_rows(eax)]
+       test    eax,eax
+       jle     near .return
+       alignx  16,7
+.rowloop:
+       pushpic eax
+       push    edx
+       push    ebx
+       push    edi
+       push    esi
+       push    ecx                     ; col
+
+       mov     esi, JSAMPROW [esi]     ; inptr
+       mov     edi, JSAMPROW [edi]     ; outptr0
+       mov     ebx, JSAMPROW [ebx]     ; outptr1
+       mov     edx, JSAMPROW [edx]     ; outptr2
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     near .columnloop
+       alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+       push    eax
+       push    edx
+       lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+       test    cl, SIZEOF_BYTE
+       jz      short .column_ld2
+       sub     ecx, byte SIZEOF_BYTE
+       movzx   eax, BYTE [esi+ecx]
+.column_ld2:
+       test    cl, SIZEOF_WORD
+       jz      short .column_ld4
+       sub     ecx, byte SIZEOF_WORD
+       movzx   edx, WORD [esi+ecx]
+       shl     eax, WORD_BIT
+       or      eax,edx
+.column_ld4:
+       movd    xmmA,eax
+       pop     edx
+       pop     eax
+       test    cl, SIZEOF_DWORD
+       jz      short .column_ld8
+       sub     ecx, byte SIZEOF_DWORD
+       movd    xmmF, _DWORD [esi+ecx]
+       pslldq  xmmA, SIZEOF_DWORD
+       por     xmmA,xmmF
+.column_ld8:
+       test    cl, SIZEOF_MMWORD
+       jz      short .column_ld16
+       sub     ecx, byte SIZEOF_MMWORD
+       movq    xmmB, _MMWORD [esi+ecx]
+       pslldq  xmmA, SIZEOF_MMWORD
+       por     xmmA,xmmB
+.column_ld16:
+       test    cl, SIZEOF_XMMWORD
+       jz      short .column_ld32
+       movdqa  xmmF,xmmA
+       movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       mov     ecx, SIZEOF_XMMWORD
+       jmp     short .rgb_ycc_cnv
+.column_ld32:
+       test    cl, 2*SIZEOF_XMMWORD
+       mov     ecx, SIZEOF_XMMWORD
+       jz      short .rgb_ycc_cnv
+       movdqa  xmmB,xmmA
+       movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+       jmp     short .rgb_ycc_cnv
+       alignx  16,7
+
+.columnloop:
+       movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+       movdqu  xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+       ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+       ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+       ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+       movdqa    xmmG,xmmA
+       pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+       psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+       punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+       pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+       punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+       punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+       movdqa    xmmD,xmmA
+       pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+       psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+       punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+       pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+       punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+       punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+       movdqa    xmmE,xmmA
+       pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+       psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+       punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+       pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+       punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+       punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+       pxor      xmmH,xmmH
+
+       movdqa    xmmC,xmmA
+       punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+       punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+       movdqa    xmmB,xmmE
+       punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+       punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+       movdqa    xmmF,xmmD
+       punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+       punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+       test    cl, SIZEOF_XMMWORD/16
+       jz      short .column_ld2
+       sub     ecx, byte SIZEOF_XMMWORD/16
+       movd    xmmA, _DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+       test    cl, SIZEOF_XMMWORD/8
+       jz      short .column_ld4
+       sub     ecx, byte SIZEOF_XMMWORD/8
+       movq    xmmE, _MMWORD [esi+ecx*RGB_PIXELSIZE]
+       pslldq  xmmA, SIZEOF_MMWORD
+       por     xmmA,xmmE
+.column_ld4:
+       test    cl, SIZEOF_XMMWORD/4
+       jz      short .column_ld8
+       sub     ecx, byte SIZEOF_XMMWORD/4
+       movdqa  xmmE,xmmA
+       movdqu  xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld8:
+       test    cl, SIZEOF_XMMWORD/2
+       mov     ecx, SIZEOF_XMMWORD
+       jz      short .rgb_ycc_cnv
+       movdqa  xmmF,xmmA
+       movdqa  xmmH,xmmE
+       movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+       jmp     short .rgb_ycc_cnv
+       alignx  16,7
+
+.columnloop:
+       movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+       movdqu  xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+       movdqu  xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+       ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+       ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+       ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+       ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+       movdqa    xmmD,xmmA
+       punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+       punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+       movdqa    xmmC,xmmF
+       punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+       punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+       movdqa    xmmB,xmmA
+       punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+       punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+       movdqa    xmmG,xmmD
+       punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+       punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+       movdqa    xmmE,xmmA
+       punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+       punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+       movdqa    xmmH,xmmB
+       punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+       punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+       pxor      xmmF,xmmF
+
+       movdqa    xmmC,xmmA
+       punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+       punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+       movdqa    xmmD,xmmB
+       punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+       punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+       movdqa    xmmG,xmmE
+       punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+       punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+       punpcklbw xmmF,xmmH
+       punpckhbw xmmH,xmmH
+       psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+       psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+       ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+       ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+       ; (Original)
+       ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+       ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+       ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+       ;
+       ; (This implementation)
+       ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+       ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+       ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+
+       movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=RE
+       movdqa    XMMWORD [wk(1)], xmm1 ; wk(1)=RO
+       movdqa    XMMWORD [wk(2)], xmm4 ; wk(2)=BE
+       movdqa    XMMWORD [wk(3)], xmm5 ; wk(3)=BO
+
+       movdqa    xmm6,xmm1
+       punpcklwd xmm1,xmm3
+       punpckhwd xmm6,xmm3
+       movdqa    xmm7,xmm1
+       movdqa    xmm4,xmm6
+       pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+       pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+       pmaddwd   xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+       pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+
+       movdqa    XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+       movdqa    XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+       pxor      xmm1,xmm1
+       pxor      xmm6,xmm6
+       punpcklwd xmm1,xmm5             ; xmm1=BOL
+       punpckhwd xmm6,xmm5             ; xmm6=BOH
+       psrld     xmm1,1                ; xmm1=BOL*FIX(0.500)
+       psrld     xmm6,1                ; xmm6=BOH*FIX(0.500)
+
+       movdqa    xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
+
+       paddd     xmm7,xmm1
+       paddd     xmm4,xmm6
+       paddd     xmm7,xmm5
+       paddd     xmm4,xmm5
+       psrld     xmm7,SCALEBITS        ; xmm7=CbOL
+       psrld     xmm4,SCALEBITS        ; xmm4=CbOH
+       packssdw  xmm7,xmm4             ; xmm7=CbO
+
+       movdqa    xmm1, XMMWORD [wk(2)] ; xmm1=BE
+
+       movdqa    xmm6,xmm0
+       punpcklwd xmm0,xmm2
+       punpckhwd xmm6,xmm2
+       movdqa    xmm5,xmm0
+       movdqa    xmm4,xmm6
+       pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+       pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+       pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+
+       movdqa    XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+       movdqa    XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+       pxor      xmm0,xmm0
+       pxor      xmm6,xmm6
+       punpcklwd xmm0,xmm1             ; xmm0=BEL
+       punpckhwd xmm6,xmm1             ; xmm6=BEH
+       psrld     xmm0,1                ; xmm0=BEL*FIX(0.500)
+       psrld     xmm6,1                ; xmm6=BEH*FIX(0.500)
+
+       movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+
+       paddd     xmm5,xmm0
+       paddd     xmm4,xmm6
+       paddd     xmm5,xmm1
+       paddd     xmm4,xmm1
+       psrld     xmm5,SCALEBITS        ; xmm5=CbEL
+       psrld     xmm4,SCALEBITS        ; xmm4=CbEH
+       packssdw  xmm5,xmm4             ; xmm5=CbE
+
+       psllw     xmm7,BYTE_BIT
+       por       xmm5,xmm7             ; xmm5=Cb
+       movdqa    XMMWORD [ebx], xmm5   ; Save Cb
+
+       movdqa    xmm0, XMMWORD [wk(3)] ; xmm0=BO
+       movdqa    xmm6, XMMWORD [wk(2)] ; xmm6=BE
+       movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=RO
+
+       movdqa    xmm4,xmm0
+       punpcklwd xmm0,xmm3
+       punpckhwd xmm4,xmm3
+       movdqa    xmm7,xmm0
+       movdqa    xmm5,xmm4
+       pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+       pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+       pmaddwd   xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+
+       movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
+
+       paddd     xmm0, XMMWORD [wk(4)]
+       paddd     xmm4, XMMWORD [wk(5)]
+       paddd     xmm0,xmm3
+       paddd     xmm4,xmm3
+       psrld     xmm0,SCALEBITS        ; xmm0=YOL
+       psrld     xmm4,SCALEBITS        ; xmm4=YOH
+       packssdw  xmm0,xmm4             ; xmm0=YO
+
+       pxor      xmm3,xmm3
+       pxor      xmm4,xmm4
+       punpcklwd xmm3,xmm1             ; xmm3=ROL
+       punpckhwd xmm4,xmm1             ; xmm4=ROH
+       psrld     xmm3,1                ; xmm3=ROL*FIX(0.500)
+       psrld     xmm4,1                ; xmm4=ROH*FIX(0.500)
+
+       movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+
+       paddd     xmm7,xmm3
+       paddd     xmm5,xmm4
+       paddd     xmm7,xmm1
+       paddd     xmm5,xmm1
+       psrld     xmm7,SCALEBITS        ; xmm7=CrOL
+       psrld     xmm5,SCALEBITS        ; xmm5=CrOH
+       packssdw  xmm7,xmm5             ; xmm7=CrO
+
+       movdqa    xmm3, XMMWORD [wk(0)] ; xmm3=RE
+
+       movdqa    xmm4,xmm6
+       punpcklwd xmm6,xmm2
+       punpckhwd xmm4,xmm2
+       movdqa    xmm1,xmm6
+       movdqa    xmm5,xmm4
+       pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+       pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+       pmaddwd   xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+
+       movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
+
+       paddd     xmm6, XMMWORD [wk(6)]
+       paddd     xmm4, XMMWORD [wk(7)]
+       paddd     xmm6,xmm2
+       paddd     xmm4,xmm2
+       psrld     xmm6,SCALEBITS        ; xmm6=YEL
+       psrld     xmm4,SCALEBITS        ; xmm4=YEH
+       packssdw  xmm6,xmm4             ; xmm6=YE
+
+       psllw     xmm0,BYTE_BIT
+       por       xmm6,xmm0             ; xmm6=Y
+       movdqa    XMMWORD [edi], xmm6   ; Save Y
+
+       pxor      xmm2,xmm2
+       pxor      xmm4,xmm4
+       punpcklwd xmm2,xmm3             ; xmm2=REL
+       punpckhwd xmm4,xmm3             ; xmm4=REH
+       psrld     xmm2,1                ; xmm2=REL*FIX(0.500)
+       psrld     xmm4,1                ; xmm4=REH*FIX(0.500)
+
+       movdqa    xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
+
+       paddd     xmm1,xmm2
+       paddd     xmm5,xmm4
+       paddd     xmm1,xmm0
+       paddd     xmm5,xmm0
+       psrld     xmm1,SCALEBITS        ; xmm1=CrEL
+       psrld     xmm5,SCALEBITS        ; xmm5=CrEH
+       packssdw  xmm1,xmm5             ; xmm1=CrE
+
+       psllw     xmm7,BYTE_BIT
+       por       xmm1,xmm7             ; xmm1=Cr
+       movdqa    XMMWORD [edx], xmm1   ; Save Cr
+
+       sub     ecx, byte SIZEOF_XMMWORD
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+       add     edi, byte SIZEOF_XMMWORD                ; outptr0
+       add     ebx, byte SIZEOF_XMMWORD                ; outptr1
+       add     edx, byte SIZEOF_XMMWORD                ; outptr2
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     near .columnloop
+       test    ecx,ecx
+       jnz     near .column_ld1
+
+       pop     ecx                     ; col
+       pop     esi
+       pop     edi
+       pop     ebx
+       pop     edx
+       poppic  eax
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+       add     edi, byte SIZEOF_JSAMPROW
+       add     ebx, byte SIZEOF_JSAMPROW
+       add     edx, byte SIZEOF_JSAMPROW
+       dec     eax                             ; num_rows
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JCCOLOR_RGBYCC_SSE2_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
index 61fa79b9e68bcc3e3bfe84b9a50af55a9598214b..3a89eb4a86dd13c590b1386c171979f154e7bf2c 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : December 24, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains the forward-DCT management logic.
  * This code selects a particular DCT implementation to be used,
  * and it performs related housekeeping chores including coefficient
@@ -24,6 +31,8 @@ typedef struct {
 
   /* Pointer to the DCT routine actually in use */
   forward_DCT_method_ptr do_dct;
+  convsamp_int_method_ptr convsamp;
+  quantize_int_method_ptr quantize;
 
   /* The actual post-DCT divisors --- not identical to the quant table
    * entries, because of scaling (especially for an unnormalized DCT).
@@ -34,12 +43,75 @@ typedef struct {
 #ifdef DCT_FLOAT_SUPPORTED
   /* Same as above for the floating-point case. */
   float_DCT_method_ptr do_float_dct;
+  convsamp_float_method_ptr float_convsamp;
+  quantize_float_method_ptr float_quantize;
   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
 #endif
 } my_fdct_controller;
 
 typedef my_fdct_controller * my_fdct_ptr;
 
+/*
+ * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address
+ * is aligned to a 16-byte boundary; if not, a general-protection exception
+ * (#GP) is generated.
+ */
+
+#define ALIGN_SIZE     16              /* sizeof SSE/SSE2 register */
+#define ALIGN_MEM(p,a) ((void *) (((size_t) (p) + (a) - 1) & -(a)))
+
+#ifdef JFDCT_INT_QUANTIZE_WITH_DIVISION
+#undef jpeg_quantize_int
+#undef jpeg_quantize_int_mmx
+#undef jpeg_quantize_int_sse2
+#define jpeg_quantize_int       jpeg_quantize_idiv
+#define jpeg_quantize_int_mmx   jpeg_quantize_idiv
+#define jpeg_quantize_int_sse2  jpeg_quantize_idiv
+#endif
+
+
+#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+/*
+ * SIMD Ext: compute the reciprocal of the divisor
+ *
+ * This implementation is based on an algorithm described in
+ *   "How to optimize for the Pentium family of microprocessors"
+ *   (http://www.agner.org/assem/).
+ */
+
+LOCAL(void)
+compute_reciprocal (DCTELEM divisor, DCTELEM * dtbl)
+{
+  unsigned long d = ((unsigned long) divisor) & 0x0000FFFF;
+  unsigned long fq, fr;
+  int b, r, c;
+
+  for (b = 0; (1UL << b) <= d; b++) ;
+
+  r  = 16 + (--b);
+  fq = (1UL << r) / d;
+  fr = (1UL << r) % d;
+  r -= 16;
+  c  = 0;
+
+  if (fr == 0) {
+    fq >>= 1;
+    r--;
+  } else if (fr <= (d / 2)) {
+    c++;
+  } else {
+    fq++;
+  }
+
+  dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;           /* reciprocal */
+  dtbl[DCTSIZE2 * 1] = (DCTELEM) (c + (d / 2));        /* correction + roundfactor */
+  dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (16 - (r + 1 + 1)));    /* scale */
+  dtbl[DCTSIZE2 * 3] = (DCTELEM) (r + 1);                      /* shift */
+}
+
+#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */
+
 
 /*
  * Initialize for a processing pass.
@@ -75,6 +147,18 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
       /* For LL&M IDCT method, divisors are equal to raw quantization
        * coefficients multiplied by 8 (to counteract scaling).
        */
+#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+      if (fdct->divisors[qtblno] == NULL) {
+       fdct->divisors[qtblno] = (DCTELEM *)
+         (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                     (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+       compute_reciprocal ((DCTELEM) (qtbl->quantval[i] << 3), &dtbl[i]);
+      }
+      break;
+#else  /* JFDCT_INT_QUANTIZE_WITH_DIVISION */
       if (fdct->divisors[qtblno] == NULL) {
        fdct->divisors[qtblno] = (DCTELEM *)
          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -85,7 +169,8 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
        dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
       }
       break;
-#endif
+#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */
+#endif /* DCT_ISLOW_SUPPORTED */
 #ifdef DCT_IFAST_SUPPORTED
     case JDCT_IFAST:
       {
@@ -109,6 +194,21 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
        };
        SHIFT_TEMPS
 
+#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+       if (fdct->divisors[qtblno] == NULL) {
+         fdct->divisors[qtblno] = (DCTELEM *)
+           (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                       (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
+       }
+       dtbl = fdct->divisors[qtblno];
+       for (i = 0; i < DCTSIZE2; i++) {
+         compute_reciprocal ((DCTELEM)
+                              DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                                    (INT32) aanscales[i]),
+                                      CONST_BITS-3),
+                             &dtbl[i]);
+       }
+#else  /* JFDCT_INT_QUANTIZE_WITH_DIVISION */
        if (fdct->divisors[qtblno] == NULL) {
          fdct->divisors[qtblno] = (DCTELEM *)
            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -121,9 +221,10 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
                                  (INT32) aanscales[i]),
                    CONST_BITS-3);
        }
+#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */
       }
       break;
-#endif
+#endif /* DCT_IFAST_SUPPORTED */
 #ifdef DCT_FLOAT_SUPPORTED
     case JDCT_FLOAT:
       {
@@ -183,83 +284,23 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
             JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
-  /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  forward_DCT_method_ptr do_dct = fdct->do_dct;
   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
-  DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  DCTELEM workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(DCTELEM)];
+  DCTELEM * wkptr = (DCTELEM *) ALIGN_MEM(workspace, ALIGN_SIZE);
   JDIMENSION bi;
 
   sample_data += start_row;    /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
-    { register DCTELEM *workspaceptr;
-      register JSAMPROW elemptr;
-      register int elemr;
-
-      workspaceptr = workspace;
-      for (elemr = 0; elemr < DCTSIZE; elemr++) {
-       elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8               /* unroll the inner loop */
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-       *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-#else
-       { register int elemc;
-         for (elemc = DCTSIZE; elemc > 0; elemc--) {
-           *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-         }
-       }
-#endif
-      }
-    }
+    (*fdct->convsamp) (sample_data, start_col, wkptr);
 
     /* Perform the DCT */
-    (*do_dct) (workspace);
+    (*fdct->do_dct) (wkptr);
 
     /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register DCTELEM temp, qval;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-       qval = divisors[i];
-       temp = workspace[i];
-       /* Divide the coefficient value by qval, ensuring proper rounding.
-        * Since C does not specify the direction of rounding for negative
-        * quotients, we have to force the dividend positive for portability.
-        *
-        * In most files, at least half of the output values will be zero
-        * (at default quantization settings, more like three-quarters...)
-        * so we should ensure that this case is fast.  On many machines,
-        * a comparison is enough cheaper than a divide to make a special test
-        * a win.  Since both inputs will be nonnegative, we need only test
-        * for a < b to discover whether a/b is 0.
-        * If your machine's division is fast enough, define FAST_DIVIDE.
-        */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b) a /= b
-#else
-#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
-#endif
-       if (temp < 0) {
-         temp = -temp;
-         temp += qval>>1;      /* for rounding */
-         DIVIDE_BY(temp, qval);
-         temp = -temp;
-       } else {
-         temp += qval>>1;      /* for rounding */
-         DIVIDE_BY(temp, qval);
-       }
-       output_ptr[i] = (JCOEF) temp;
-      }
-    }
+    (*fdct->quantize) (coef_blocks[bi], divisors, wkptr);
   }
 }
 
@@ -273,64 +314,23 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
                   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
 {
-  /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  float_DCT_method_ptr do_dct = fdct->do_float_dct;
   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  FAST_FLOAT workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(FAST_FLOAT)];
+  FAST_FLOAT * wkptr = (FAST_FLOAT *) ALIGN_MEM(workspace, ALIGN_SIZE);
   JDIMENSION bi;
 
   sample_data += start_row;    /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
-    { register FAST_FLOAT *workspaceptr;
-      register JSAMPROW elemptr;
-      register int elemr;
-
-      workspaceptr = workspace;
-      for (elemr = 0; elemr < DCTSIZE; elemr++) {
-       elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8               /* unroll the inner loop */
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-       *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-#else
-       { register int elemc;
-         for (elemc = DCTSIZE; elemc > 0; elemc--) {
-           *workspaceptr++ = (FAST_FLOAT)
-             (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-         }
-       }
-#endif
-      }
-    }
+    (*fdct->float_convsamp) (sample_data, start_col, wkptr);
 
     /* Perform the DCT */
-    (*do_dct) (workspace);
+    (*fdct->do_float_dct) (wkptr);
 
     /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register FAST_FLOAT temp;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-       /* Apply the quantization and scaling factor */
-       temp = workspace[i] * divisors[i];
-       /* Round to nearest integer.
-        * Since C does not specify the direction of rounding for negative
-        * quotients, we have to force the dividend positive for portability.
-        * The maximum coefficient size is +-16K (for 12-bit data), so this
-        * code should work for either 16-bit or 32-bit ints.
-        */
-       output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
-      }
-    }
+    (*fdct->float_quantize) (coef_blocks[bi], divisors, wkptr);
   }
 }
 
@@ -346,6 +346,7 @@ jinit_forward_dct (j_compress_ptr cinfo)
 {
   my_fdct_ptr fdct;
   int i;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -357,21 +358,86 @@ jinit_forward_dct (j_compress_ptr cinfo)
 #ifdef DCT_ISLOW_SUPPORTED
   case JDCT_ISLOW:
     fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_islow;
-    break;
+#ifdef JFDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_islow_sse2)) {
+      fdct->do_dct = jpeg_fdct_islow_sse2;
+      fdct->convsamp = jpeg_convsamp_int_sse2;
+      fdct->quantize = jpeg_quantize_int_sse2;
+    } else
+#endif
+#ifdef JFDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX) {
+      fdct->do_dct = jpeg_fdct_islow_mmx;
+      fdct->convsamp = jpeg_convsamp_int_mmx;
+      fdct->quantize = jpeg_quantize_int_mmx;
+    } else
 #endif
+    {
+      fdct->do_dct = jpeg_fdct_islow;
+      fdct->convsamp = jpeg_convsamp_int;
+      fdct->quantize = jpeg_quantize_int;
+    }
+    break;
+#endif /* DCT_ISLOW_SUPPORTED */
 #ifdef DCT_IFAST_SUPPORTED
   case JDCT_IFAST:
     fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_ifast;
-    break;
+#ifdef JFDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_ifast_sse2)) {
+      fdct->do_dct = jpeg_fdct_ifast_sse2;
+      fdct->convsamp = jpeg_convsamp_int_sse2;
+      fdct->quantize = jpeg_quantize_int_sse2;
+    } else
 #endif
+#ifdef JFDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX) {
+      fdct->do_dct = jpeg_fdct_ifast_mmx;
+      fdct->convsamp = jpeg_convsamp_int_mmx;
+      fdct->quantize = jpeg_quantize_int_mmx;
+    } else
+#endif
+    {
+      fdct->do_dct = jpeg_fdct_ifast;
+      fdct->convsamp = jpeg_convsamp_int;
+      fdct->quantize = jpeg_quantize_int;
+    }
+    break;
+#endif /* DCT_IFAST_SUPPORTED */
 #ifdef DCT_FLOAT_SUPPORTED
   case JDCT_FLOAT:
     fdct->pub.forward_DCT = forward_DCT_float;
-    fdct->do_float_dct = jpeg_fdct_float;
-    break;
+#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE && simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) {
+      fdct->do_float_dct = jpeg_fdct_float_sse;
+      fdct->float_convsamp = jpeg_convsamp_flt_sse2;
+      fdct->float_quantize = jpeg_quantize_flt_sse2;
+    } else
+#endif
+#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED
+    if (simd & JSIMD_SSE &&
+        IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) {
+      fdct->do_float_dct = jpeg_fdct_float_sse;
+      fdct->float_convsamp = jpeg_convsamp_flt_sse;
+      fdct->float_quantize = jpeg_quantize_flt_sse;
+    } else
+#endif
+#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED
+    if (simd & JSIMD_3DNOW) {
+      fdct->do_float_dct = jpeg_fdct_float_3dnow;
+      fdct->float_convsamp = jpeg_convsamp_flt_3dnow;
+      fdct->float_quantize = jpeg_quantize_flt_3dnow;
+    } else
 #endif
+    {
+      fdct->do_float_dct = jpeg_fdct_float;
+      fdct->float_convsamp = jpeg_convsamp_float;
+      fdct->float_quantize = jpeg_quantize_float;
+    }
+    break;
+#endif /* DCT_FLOAT_SUPPORTED */
   default:
     ERREXIT(cinfo, JERR_NOT_COMPILED);
     break;
@@ -385,3 +451,65 @@ jinit_forward_dct (j_compress_ptr cinfo)
 #endif
   }
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_forward_dct (j_compress_ptr cinfo, int method)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+  switch (method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+#ifdef JFDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_islow_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JFDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_ISLOW_SUPPORTED */
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+#ifdef JFDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_ifast_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JFDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_IFAST_SUPPORTED */
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE && simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fdct_float_sse))
+      return JSIMD_SSE;                /* (JSIMD_SSE | JSIMD_SSE2); */
+#endif
+#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED
+    if (simd & JSIMD_SSE &&
+        IS_CONST_ALIGNED_16(jconst_fdct_float_sse))
+      return JSIMD_SSE;                /* (JSIMD_SSE | JSIMD_MMX); */
+#endif
+#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED
+    if (simd & JSIMD_3DNOW)
+      return JSIMD_3DNOW;      /* (JSIMD_3DNOW | JSIMD_MMX); */
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_FLOAT_SUPPORTED */
+  default:
+    ;
+  }
+
+  return JSIMD_NONE;   /* not compiled */
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
diff --git a/jcolsamp.h b/jcolsamp.h
new file mode 100644 (file)
index 0000000..2a27b53
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * jcolsamp.h - private declarations for color conversion & up/downsampling
+ *
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * Last Modified : February 4, 2006
+ *
+ * [TAB8]
+ */
+
+
+/* configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only
+ * valid setting on this SIMD extension.
+ */
+#if BITS_IN_JSAMPLE != 8
+#error "Sorry, this SIMD code only copes with 8-bit sample values."
+#endif
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_rgb_ycc_convert_mmx       jMRgbYccCnv     /* jccolmmx.asm */
+#define jpeg_rgb_ycc_convert_sse2      jSRgbYccCnv     /* jccolss2.asm */
+#define jpeg_h2v1_downsample_mmx       jM21Downsample  /* jcsammmx.asm */
+#define jpeg_h2v2_downsample_mmx       jM22Downsample  /* jcsammmx.asm */
+#define jpeg_h2v1_downsample_sse2      jS21Downsample  /* jcsamss2.asm */
+#define jpeg_h2v2_downsample_sse2      jS22Downsample  /* jcsamss2.asm */
+#define jpeg_ycc_rgb_convert_mmx       jMYccRgbCnv     /* jdcolmmx.asm */
+#define jpeg_ycc_rgb_convert_sse2      jSYccRgbCnv     /* jdcolss2.asm */
+#define jpeg_h2v1_merged_upsample_mmx  jM21MerUpsample /* jdmermmx.asm */
+#define jpeg_h2v2_merged_upsample_mmx  jM22MerUpsample /* jdmermmx.asm */
+#define jpeg_h2v1_merged_upsample_sse2 jS21MerUpsample /* jdmerss2.asm */
+#define jpeg_h2v2_merged_upsample_sse2 jS22MerUpsample /* jdmerss2.asm */
+#define jpeg_h2v1_fancy_upsample_mmx   jM21FanUpsample /* jdsammmx.asm */
+#define jpeg_h2v2_fancy_upsample_mmx   jM22FanUpsample /* jdsammmx.asm */
+#define jpeg_h1v2_fancy_upsample_mmx   jM12FanUpsample /* jdsammmx.asm */
+#define jpeg_h2v1_upsample_mmx         jM21Upsample    /* jdsammmx.asm */
+#define jpeg_h2v2_upsample_mmx         jM22Upsample    /* jdsammmx.asm */
+#define jpeg_h2v1_fancy_upsample_sse2  jS21FanUpsample /* jdsamss2.asm */
+#define jpeg_h2v2_fancy_upsample_sse2  jS22FanUpsample /* jdsamss2.asm */
+#define jpeg_h1v2_fancy_upsample_sse2  jS12FanUpsample /* jdsamss2.asm */
+#define jpeg_h2v1_upsample_sse2                jS21Upsample    /* jdsamss2.asm */
+#define jpeg_h2v2_upsample_sse2                jS22Upsample    /* jdsamss2.asm */
+#define jconst_rgb_ycc_convert_mmx     jMCRgbYccCnv    /* jccolmmx.asm */
+#define jconst_rgb_ycc_convert_sse2    jSCRgbYccCnv    /* jccolss2.asm */
+#define jconst_ycc_rgb_convert_mmx     jMCYccRgbCnv    /* jdcolmmx.asm */
+#define jconst_ycc_rgb_convert_sse2    jSCYccRgbCnv    /* jdcolss2.asm */
+#define jconst_merged_upsample_mmx     jMCMerUpsample  /* jdmermmx.asm */
+#define jconst_merged_upsample_sse2    jSCMerUpsample  /* jdmerss2.asm */
+#define jconst_fancy_upsample_mmx      jMCFanUpsample  /* jdsammmx.asm */
+#define jconst_fancy_upsample_sse2     jSCFanUpsample  /* jdsamss2.asm */
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+#define jpeg_simd_merged_upsampler     jSiMUpsampler   /* jdmerge.c    */
+#endif
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for color conversion & up/downsampling routines. */
+
+EXTERN(void) jpeg_rgb_ycc_convert_mmx
+    JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+        JDIMENSION output_row, int num_rows));
+EXTERN(void) jpeg_rgb_ycc_convert_sse2
+    JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+        JDIMENSION output_row, int num_rows));
+
+EXTERN(void) jpeg_h2v1_downsample_mmx
+    JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY output_data));
+EXTERN(void) jpeg_h2v2_downsample_mmx
+    JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY output_data));
+EXTERN(void) jpeg_h2v1_downsample_sse2
+    JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY output_data));
+EXTERN(void) jpeg_h2v2_downsample_sse2
+    JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+EXTERN(void) jpeg_ycc_rgb_convert_mmx
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
+        JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jpeg_ycc_rgb_convert_sse2
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
+        JSAMPARRAY output_buf, int num_rows));
+
+EXTERN(void) jpeg_h2v1_merged_upsample_mmx
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+        JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
+EXTERN(void) jpeg_h2v2_merged_upsample_mmx
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+        JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
+EXTERN(void) jpeg_h2v1_merged_upsample_sse2
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+        JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
+EXTERN(void) jpeg_h2v2_merged_upsample_sse2
+    JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+        JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
+
+EXTERN(void) jpeg_h2v1_fancy_upsample_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v2_fancy_upsample_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h1v2_fancy_upsample_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v1_upsample_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v2_upsample_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v1_fancy_upsample_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v2_fancy_upsample_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h1v2_fancy_upsample_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v1_upsample_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jpeg_h2v2_upsample_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+extern const int jconst_rgb_ycc_convert_mmx[];
+extern const int jconst_rgb_ycc_convert_sse2[];
+extern const int jconst_ycc_rgb_convert_mmx[];
+extern const int jconst_ycc_rgb_convert_sse2[];
+extern const int jconst_merged_upsample_mmx[];
+extern const int jconst_merged_upsample_sse2[];
+extern const int jconst_fancy_upsample_mmx[];
+extern const int jconst_fancy_upsample_sse2[];
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+EXTERN(unsigned int) jpeg_simd_merged_upsampler JPP((j_decompress_ptr cinfo));
+#endif
diff --git a/jcolsamp.inc b/jcolsamp.inc
new file mode 100644 (file)
index 0000000..03f5dbd
--- /dev/null
@@ -0,0 +1,156 @@
+;
+; jcolsamp.inc - private declarations for color conversion & up/downsampling
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; Last Modified : January 5, 2006
+;
+; [TAB8]
+
+; --------------------------------------------------------------------------
+;
+; configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only
+; valid setting on this SIMD extension.
+;
+%if BITS_IN_JSAMPLE != 8
+%error "Sorry, this SIMD code only copes with 8-bit sample values."
+%endif
+
+; Short forms of external names for systems with brain-damaged linkers.
+;
+%ifdef NEED_SHORT_EXTERNAL_NAMES
+%define jpeg_rgb_ycc_convert_mmx       jMRgbYccCnv     ; jccolmmx.asm
+%define jpeg_rgb_ycc_convert_sse2      jSRgbYccCnv     ; jccolss2.asm
+%define jpeg_h2v1_downsample_mmx       jM21Downsample  ; jcsammmx.asm
+%define jpeg_h2v2_downsample_mmx       jM22Downsample  ; jcsammmx.asm
+%define jpeg_h2v1_downsample_sse2      jS21Downsample  ; jcsamss2.asm
+%define jpeg_h2v2_downsample_sse2      jS22Downsample  ; jcsamss2.asm
+%define jpeg_ycc_rgb_convert_mmx       jMYccRgbCnv     ; jdcolmmx.asm
+%define jpeg_ycc_rgb_convert_sse2      jSYccRgbCnv     ; jdcolss2.asm
+%define jpeg_h2v1_merged_upsample_mmx  jM21MerUpsample ; jdmermmx.asm
+%define jpeg_h2v2_merged_upsample_mmx  jM22MerUpsample ; jdmermmx.asm
+%define jpeg_h2v1_merged_upsample_sse2 jS21MerUpsample ; jdmerss2.asm
+%define jpeg_h2v2_merged_upsample_sse2 jS22MerUpsample ; jdmerss2.asm
+%define jpeg_h2v1_fancy_upsample_mmx   jM21FanUpsample ; jdsammmx.asm
+%define jpeg_h2v2_fancy_upsample_mmx   jM22FanUpsample ; jdsammmx.asm
+%define jpeg_h1v2_fancy_upsample_mmx   jM12FanUpsample ; jdsammmx.asm
+%define jpeg_h2v1_upsample_mmx         jM21Upsample    ; jdsammmx.asm
+%define jpeg_h2v2_upsample_mmx         jM22Upsample    ; jdsammmx.asm
+%define jpeg_h2v1_fancy_upsample_sse2  jS21FanUpsample ; jdsamss2.asm
+%define jpeg_h2v2_fancy_upsample_sse2  jS22FanUpsample ; jdsamss2.asm
+%define jpeg_h1v2_fancy_upsample_sse2  jS12FanUpsample ; jdsamss2.asm
+%define jpeg_h2v1_upsample_sse2                jS21Upsample    ; jdsamss2.asm
+%define jpeg_h2v2_upsample_sse2                jS22Upsample    ; jdsamss2.asm
+%define jconst_rgb_ycc_convert_mmx     jMCRgbYccCnv    ; jccolmmx.asm
+%define jconst_rgb_ycc_convert_sse2    jSCRgbYccCnv    ; jccolss2.asm
+%define jconst_ycc_rgb_convert_mmx     jMCYccRgbCnv    ; jdcolmmx.asm
+%define jconst_ycc_rgb_convert_sse2    jSCYccRgbCnv    ; jdcolss2.asm
+%define jconst_merged_upsample_mmx     jMCMerUpsample  ; jdmermmx.asm
+%define jconst_merged_upsample_sse2    jSCMerUpsample  ; jdmerss2.asm
+%define jconst_fancy_upsample_mmx      jMCFanUpsample  ; jdsammmx.asm
+%define jconst_fancy_upsample_sse2     jSCFanUpsample  ; jdsamss2.asm
+%endif ; NEED_SHORT_EXTERNAL_NAMES
+
+; --------------------------------------------------------------------------
+
+; pseudo-resisters to make ordering of RGB configurable
+;
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%if RGB_RED < 0 || RGB_RED >= RGB_PIXELSIZE || RGB_GREEN < 0 || \
+   RGB_GREEN >= RGB_PIXELSIZE || RGB_BLUE < 0 || RGB_BLUE >= RGB_PIXELSIZE || \
+   RGB_RED == RGB_GREEN || RGB_GREEN == RGB_BLUE || RGB_RED == RGB_BLUE
+%error "Incorrect RGB pixel offset."
+%endif
+
+%if RGB_RED == 0
+%define  mmA  mm0
+%define  mmB  mm1
+%define xmmA xmm0
+%define xmmB xmm1
+%elif RGB_GREEN == 0
+%define  mmA  mm2
+%define  mmB  mm3
+%define xmmA xmm2
+%define xmmB xmm3
+%elif RGB_BLUE == 0
+%define  mmA  mm4
+%define  mmB  mm5
+%define xmmA xmm4
+%define xmmB xmm5
+%else
+%define  mmA  mm6
+%define  mmB  mm7
+%define xmmA xmm6
+%define xmmB xmm7
+%endif
+
+%if RGB_RED == 1
+%define  mmC  mm0
+%define  mmD  mm1
+%define xmmC xmm0
+%define xmmD xmm1
+%elif RGB_GREEN == 1
+%define  mmC  mm2
+%define  mmD  mm3
+%define xmmC xmm2
+%define xmmD xmm3
+%elif RGB_BLUE == 1
+%define  mmC  mm4
+%define  mmD  mm5
+%define xmmC xmm4
+%define xmmD xmm5
+%else
+%define  mmC  mm6
+%define  mmD  mm7
+%define xmmC xmm6
+%define xmmD xmm7
+%endif
+
+%if RGB_RED == 2
+%define  mmE  mm0
+%define  mmF  mm1
+%define xmmE xmm0
+%define xmmF xmm1
+%elif RGB_GREEN == 2
+%define  mmE  mm2
+%define  mmF  mm3
+%define xmmE xmm2
+%define xmmF xmm3
+%elif RGB_BLUE == 2
+%define  mmE  mm4
+%define  mmF  mm5
+%define xmmE xmm4
+%define xmmF xmm5
+%else
+%define  mmE  mm6
+%define  mmF  mm7
+%define xmmE xmm6
+%define xmmF xmm7
+%endif
+
+%if RGB_RED == 3
+%define  mmG  mm0
+%define  mmH  mm1
+%define xmmG xmm0
+%define xmmH xmm1
+%elif RGB_GREEN == 3
+%define  mmG  mm2
+%define  mmH  mm3
+%define xmmG xmm2
+%define xmmH xmm3
+%elif RGB_BLUE == 3
+%define  mmG  mm4
+%define  mmH  mm5
+%define xmmG xmm4
+%define xmmH xmm5
+%else
+%define  mmG  mm6
+%define  mmH  mm7
+%define xmmG xmm6
+%define xmmH xmm7
+%endif
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+
+; --------------------------------------------------------------------------
index 9b1fa7568a67a7c25a599976ada93c53db665c57..e4235c09cf45ed2efaaf24c9d9b5369d7a337b28 100644 (file)
--- a/jcomapi.c
+++ b/jcomapi.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : March 11, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains application interface routines that are used for both
  * compression and decompression.
  */
@@ -104,3 +111,54 @@ jpeg_alloc_huff_table (j_common_ptr cinfo)
   tbl->sent_table = FALSE;     /* make sure this is false in any new table */
   return tbl;
 }
+
+
+/*
+ * SIMD Ext: Checking for support of SIMD instruction set.
+ */
+
+GLOBAL(unsigned int)
+jpeg_simd_support (j_common_ptr cinfo)
+{
+  enum { JSIMD_INVALID = ~0 };
+  static volatile unsigned int simd_supported = JSIMD_INVALID;
+
+  if (simd_supported == JSIMD_INVALID)
+    simd_supported = jpeg_simd_os_support(jpeg_simd_cpu_support());
+
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+  if (cinfo != NULL)   /* Turn off the masked flags */
+    return simd_supported & ~jpeg_simd_mask(cinfo, JSIMD_NONE, JSIMD_NONE);
+#endif
+  return simd_supported;
+}
+
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+
+/*
+ * SIMD Ext: modify/retrieve SIMD instruction mask
+ */
+
+GLOBAL(unsigned int)
+jpeg_simd_mask (j_common_ptr cinfo, unsigned int remove, unsigned int add)
+{
+  unsigned long *gp;
+  unsigned int oldmask;
+
+  if (cinfo->is_decompressor)
+    gp = (unsigned long *) &((j_decompress_ptr) cinfo)->output_gamma;
+  else /* compressor */
+    gp = (unsigned long *) &((j_compress_ptr) cinfo)->input_gamma;
+
+  if ((gp[1] == 0x3FF00000 || gp[1] == 0x00000000) &&  /* +1.0 or +0.0 */
+      (gp[0] & ~JSIMD_ALL) == 0) {
+    oldmask = gp[0];
+    if (((remove | add) & ~JSIMD_ALL) == 0)
+      gp[0] = (oldmask & ~remove) | add;
+  } else {
+    oldmask = 0;       /* error */
+  }
+  return oldmask;
+}
+
+#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */
diff --git a/jconfig.bc5 b/jconfig.bc5
new file mode 100644 (file)
index 0000000..50c309d
--- /dev/null
@@ -0,0 +1,48 @@
+/* jconfig.bc5 --- jconfig.h for Borland C++ Compiler 5.5 (win32) */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS       /* we presume a 32-bit flat memory model */
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN /* this assumes you have -w-stu in CFLAGS */
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#define TYPEDEF_UCHAR_BOOLEAN
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED          /* BMP image file format */
+#define GIF_SUPPORTED          /* GIF image file format */
+#define PPM_SUPPORTED          /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED           /* Utah RLE image file format */
+#define TARGA_SUPPORTED                /* Targa image file format */
+
+#define TWO_FILE_COMMANDLINE
+#define USE_SETMODE            /* Borland has setmode() */
+#undef NEED_SIGNAL_CATCHER     /* Define this if you use jmemname.c */
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT         /* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
index 36a04fa8402f54a98ad62d1368ac584ac40e706f..147cb6bd4b3e49c9ce7c2eb01005a9ef156c794c 100644 (file)
@@ -16,6 +16,9 @@
 /* Define this if you get warnings about undefined structures. */
 #undef INCOMPLETE_TYPES_BROKEN
 
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#undef TYPEDEF_UCHAR_BOOLEAN
+
 #ifdef JPEG_INTERNALS
 
 #undef RIGHT_SHIFT_IS_UNSIGNED
 
 #endif /* JPEG_INTERNALS */
 
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
 #ifdef JPEG_CJPEG_DJPEG
 
 #define BMP_SUPPORTED          /* BMP image file format */
@@ -35,6 +45,8 @@
 #define TARGA_SUPPORTED                /* Targa image file format */
 
 #undef TWO_FILE_COMMANDLINE
+#undef USE_SETMODE
+#undef USE_FDOPEN
 #undef NEED_SIGNAL_CATCHER
 #undef DONT_USE_B_MODE
 
index f759a9dbd6b18fc97171ca66a94e085a2821cde7..b5a2e47d4903745aade2b93e51c4fbadea80f3ec 100644 (file)
 
 #endif /* JPEG_INTERNALS */
 
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
 #ifdef JPEG_CJPEG_DJPEG
 
 #define BMP_SUPPORTED          /* BMP image file format */
@@ -35,4 +42,6 @@
 #undef DONT_USE_B_MODE
 #undef PROGRESS_REPORT         /* optional */
 
+#define FREE_MEM_ESTIMATE      0       /* for alternate cjpeg/djpeg */
+
 #endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.linux b/jconfig.linux
new file mode 100644 (file)
index 0000000..6c38ed5
--- /dev/null
@@ -0,0 +1,44 @@
+/* jconfig.linux --- jconfig.h for Linux ELF with gcc */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED          /* BMP image file format */
+#define GIF_SUPPORTED          /* GIF image file format */
+#define PPM_SUPPORTED          /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED           /* Utah RLE image file format */
+#define TARGA_SUPPORTED                /* Targa image file format */
+
+#undef TWO_FILE_COMMANDLINE
+#undef NEED_SIGNAL_CATCHER     /* Define this if you use jmemname.c */
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT         /* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.mgw b/jconfig.mgw
new file mode 100644 (file)
index 0000000..83dfe1d
--- /dev/null
@@ -0,0 +1,48 @@
+/* jconfig.mgw --- jconfig.h for MinGW */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#define TYPEDEF_UCHAR_BOOLEAN
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED          /* BMP image file format */
+#define GIF_SUPPORTED          /* GIF image file format */
+#define PPM_SUPPORTED          /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED           /* Utah RLE image file format */
+#define TARGA_SUPPORTED                /* Targa image file format */
+
+#define TWO_FILE_COMMANDLINE   /* optional */
+#define USE_SETMODE            /* MinGW has setmode() */
+#undef NEED_SIGNAL_CATCHER     /* Define this if you use jmemname.c */
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT         /* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
index 7e291c75bd4ce2e5586c9bcf21bd0306d36caa5f..d5bc9f94672eeba86cee5a86605638de0abfd452 100644 (file)
 #undef INCOMPLETE_TYPES_BROKEN
 
 /* Define "boolean" as unsigned char, not int, per Windows custom */
-#ifndef __RPCNDR_H__           /* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#define HAVE_BOOLEAN           /* prevent jmorecfg.h from redefining it */
-
+#define TYPEDEF_UCHAR_BOOLEAN
 
 #ifdef JPEG_INTERNALS
 
@@ -28,6 +24,13 @@ typedef unsigned char boolean;
 
 #endif /* JPEG_INTERNALS */
 
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
 #ifdef JPEG_CJPEG_DJPEG
 
 #define BMP_SUPPORTED          /* BMP image file format */
diff --git a/jcqnt3dn.asm b/jcqnt3dn.asm
new file mode 100644 (file)
index 0000000..8197858
--- /dev/null
@@ -0,0 +1,240 @@
+;
+; jcqnt3dn.asm - sample data conversion and quantization (3DNow! & MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 23, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_flt_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                          FAST_FLOAT * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_flt_3dnow)
+
+EXTN(jpeg_convsamp_flt_3dnow):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       pcmpeqw  mm7,mm7
+       psllw    mm7,7
+       packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     eax, JDIMENSION [start_col]
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/2
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+       movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+       psubb   mm0,mm7                         ; mm0=(01234567)
+       psubb   mm1,mm7                         ; mm1=(89ABCDEF)
+
+       punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+       punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+       punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+       punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
+
+       punpcklwd mm4,mm2                       ; mm4=(***0***1)
+       punpckhwd mm2,mm2                       ; mm2=(***2***3)
+       punpcklwd mm5,mm0                       ; mm5=(***4***5)
+       punpckhwd mm0,mm0                       ; mm0=(***6***7)
+
+       psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(01)
+       psrad   mm2,(DWORD_BIT-BYTE_BIT)        ; mm2=(23)
+       pi2fd   mm4,mm4
+       pi2fd   mm2,mm2
+       psrad   mm5,(DWORD_BIT-BYTE_BIT)        ; mm5=(45)
+       psrad   mm0,(DWORD_BIT-BYTE_BIT)        ; mm0=(67)
+       pi2fd   mm5,mm5
+       pi2fd   mm0,mm0
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2
+       movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+       movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+
+       punpcklwd mm6,mm3                       ; mm6=(***8***9)
+       punpckhwd mm3,mm3                       ; mm3=(***A***B)
+       punpcklwd mm4,mm1                       ; mm4=(***C***D)
+       punpckhwd mm1,mm1                       ; mm1=(***E***F)
+
+       psrad   mm6,(DWORD_BIT-BYTE_BIT)        ; mm6=(89)
+       psrad   mm3,(DWORD_BIT-BYTE_BIT)        ; mm3=(AB)
+       pi2fd   mm6,mm6
+       pi2fd   mm3,mm3
+       psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(CD)
+       psrad   mm1,(DWORD_BIT-BYTE_BIT)        ; mm1=(EF)
+       pi2fd   mm4,mm4
+       pi2fd   mm1,mm1
+
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3
+       movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+
+       add     esi, byte 2*SIZEOF_JSAMPROW
+       add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .convloop
+
+       femms           ; empty MMX/3DNow! state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jpeg_quantize_flt_3dnow (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                          FAST_FLOAT * workspace);
+;
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; FAST_FLOAT * divisors
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_flt_3dnow)
+
+EXTN(jpeg_quantize_flt_3dnow):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov       eax, 0x4B400000       ; (float)0x00C00000 (rndint_magic)
+       movd      mm7,eax
+       punpckldq mm7,mm7               ; mm7={12582912.0F 12582912.0F}
+
+       mov     esi, POINTER [workspace]
+       mov     edx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     eax, DCTSIZE2/16
+       alignx  16,7
+.quantloop:
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+       pfmul   mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       pfmul   mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
+       pfmul   mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+       pfmul   mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+
+       pfadd   mm0,mm7                 ; mm0=(00 ** 01 **)
+       pfadd   mm1,mm7                 ; mm1=(02 ** 03 **)
+       pfadd   mm2,mm7                 ; mm0=(04 ** 05 **)
+       pfadd   mm3,mm7                 ; mm1=(06 ** 07 **)
+
+       movq      mm4,mm0
+       punpcklwd mm0,mm1               ; mm0=(00 02 ** **)
+       punpckhwd mm4,mm1               ; mm4=(01 03 ** **)
+       movq      mm5,mm2
+       punpcklwd mm2,mm3               ; mm2=(04 06 ** **)
+       punpckhwd mm5,mm3               ; mm5=(05 07 ** **)
+
+       punpcklwd mm0,mm4               ; mm0=(00 01 02 03)
+       punpcklwd mm2,mm5               ; mm2=(04 05 06 07)
+
+       movq    mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+       pfmul   mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       pfmul   mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
+       pfmul   mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+       pfmul   mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+
+       pfadd   mm6,mm7                 ; mm0=(10 ** 11 **)
+       pfadd   mm1,mm7                 ; mm4=(12 ** 13 **)
+       pfadd   mm3,mm7                 ; mm0=(14 ** 15 **)
+       pfadd   mm4,mm7                 ; mm4=(16 ** 17 **)
+
+       movq      mm5,mm6
+       punpcklwd mm6,mm1               ; mm6=(10 12 ** **)
+       punpckhwd mm5,mm1               ; mm5=(11 13 ** **)
+       movq      mm1,mm3
+       punpcklwd mm3,mm4               ; mm3=(14 16 ** **)
+       punpckhwd mm1,mm4               ; mm1=(15 17 ** **)
+
+       punpcklwd mm6,mm5               ; mm6=(10 11 12 13)
+       punpcklwd mm3,mm1               ; mm3=(14 15 16 17)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+
+       add     esi, byte 16*SIZEOF_FAST_FLOAT
+       add     edx, byte 16*SIZEOF_FAST_FLOAT
+       add     edi, byte 16*SIZEOF_JCOEF
+       dec     eax
+       jnz     near .quantloop
+
+       femms           ; empty MMX/3DNow! state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; JFDCT_FLT_3DNOW_MMX_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jcqntflt.asm b/jcqntflt.asm
new file mode 100644 (file)
index 0000000..4631a06
--- /dev/null
@@ -0,0 +1,202 @@
+;
+; jcqntflt.asm - sample data conversion and quantization (non-SIMD, FP)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : March 21, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                      FAST_FLOAT * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_float)
+
+EXTN(jpeg_convsamp_float):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi]             ; (JSAMPLE *)
+       add     ebx, JDIMENSION [start_col]
+
+%assign i 0    ; i=0
+%rep 4 ; -- repeat 4 times ---
+       xor     eax,eax
+       xor     edx,edx
+       mov     al, JSAMPLE [ebx+(i+0)*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [ebx+(i+1)*SIZEOF_JSAMPLE]
+       add     eax, byte -CENTERJSAMPLE
+       add     edx, byte -CENTERJSAMPLE
+       push    eax
+       push    edx
+%assign i i+2  ; i+=2
+%endrep        ; -- repeat end ---
+
+       fild    INT32 [esp+0*SIZEOF_INT32]
+       fild    INT32 [esp+1*SIZEOF_INT32]
+       fild    INT32 [esp+2*SIZEOF_INT32]
+       fild    INT32 [esp+3*SIZEOF_INT32]
+       fild    INT32 [esp+4*SIZEOF_INT32]
+       fild    INT32 [esp+5*SIZEOF_INT32]
+       fild    INT32 [esp+6*SIZEOF_INT32]
+       fild    INT32 [esp+7*SIZEOF_INT32]
+
+       add     esp, byte DCTSIZE*SIZEOF_INT32
+
+       fstp    FAST_FLOAT [edi+0*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+1*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+2*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+3*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+4*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+5*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+6*SIZEOF_FAST_FLOAT]
+       fstp    FAST_FLOAT [edi+7*SIZEOF_FAST_FLOAT]
+
+       add     esi, byte SIZEOF_JSAMPROW
+       add     edi, byte DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .convloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jpeg_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                      FAST_FLOAT * workspace);
+;
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; FAST_FLOAT * divisors
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+%define FLT_ROUNDS     1               ; from <float.h>
+
+       align   16
+       global  EXTN(jpeg_quantize_float)
+
+EXTN(jpeg_quantize_float):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; unused
+;      push    edx             ; unused
+       push    esi
+       push    edi
+
+%if (FLT_ROUNDS != 1)
+       push    eax
+       fnstcw  word [esp]
+       mov     eax, [esp]
+       and     eax, (~0x0C00)          ; round to nearest integer
+       push    eax
+       fldcw   word [esp]
+       pop     eax
+%endif
+       mov     esi, POINTER [workspace]
+       mov     ebx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     eax, DCTSIZE2/8
+       alignx  16,7
+.quantloop:
+       fld     FAST_FLOAT [esi+0*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+0*SIZEOF_FAST_FLOAT]
+       fld     FAST_FLOAT [esi+1*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+1*SIZEOF_FAST_FLOAT]
+       fld     FAST_FLOAT [esi+2*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+2*SIZEOF_FAST_FLOAT]
+       fld     FAST_FLOAT [esi+3*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+3*SIZEOF_FAST_FLOAT]
+
+       fld     FAST_FLOAT [esi+4*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+4*SIZEOF_FAST_FLOAT]
+       fxch    st0,st1
+       fld     FAST_FLOAT [esi+5*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+5*SIZEOF_FAST_FLOAT]
+       fxch    st0,st3
+       fld     FAST_FLOAT [esi+6*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+6*SIZEOF_FAST_FLOAT]
+       fxch    st0,st5
+       fld     FAST_FLOAT [esi+7*SIZEOF_FAST_FLOAT]
+       fmul    FAST_FLOAT [ebx+7*SIZEOF_FAST_FLOAT]
+       fxch    st0,st7
+
+       fistp   JCOEF [edi+0*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+1*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+2*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+3*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+4*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+5*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+6*SIZEOF_JCOEF]
+       fistp   JCOEF [edi+7*SIZEOF_JCOEF]
+
+       add     esi, byte 8*SIZEOF_FAST_FLOAT
+       add     ebx, byte 8*SIZEOF_FAST_FLOAT
+       add     edi, byte 8*SIZEOF_JCOEF
+       dec     eax
+       jnz     short .quantloop
+
+%if (FLT_ROUNDS != 1)
+       fldcw   word [esp]
+       pop     eax             ; pop old control word
+%endif
+       pop     edi
+       pop     esi
+;      pop     edx             ; unused
+;      pop     ecx             ; unused
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jcqntint.asm b/jcqntint.asm
new file mode 100644 (file)
index 0000000..e0de1cb
--- /dev/null
@@ -0,0 +1,243 @@
+;
+; jcqntint.asm - sample data conversion and quantization (non-SIMD, integer)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 27, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_int (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                    DCTELEM * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_int)
+
+EXTN(jpeg_convsamp_int):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi]             ; (JSAMPLE *)
+       add     ebx, JDIMENSION [start_col]
+
+%assign i 0    ; i=0
+%rep 4 ; -- repeat 4 times ---
+       xor     eax,eax
+       xor     edx,edx
+       mov     al, JSAMPLE [ebx+(i+0)*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [ebx+(i+1)*SIZEOF_JSAMPLE]
+       add     eax, byte -CENTERJSAMPLE
+       add     edx, byte -CENTERJSAMPLE
+       mov     DCTELEM [edi+(i+0)*SIZEOF_DCTELEM], ax
+       mov     DCTELEM [edi+(i+1)*SIZEOF_DCTELEM], dx
+%assign i i+2  ; i+=2
+%endrep        ; -- repeat end ---
+
+       add     esi, byte SIZEOF_JSAMPROW
+       add     edi, byte DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     short .convloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jpeg_quantize_int (JCOEFPTR coef_block, DCTELEM * divisors,
+;                    DCTELEM * workspace);
+;
+
+%define RECIPROCAL(i,b)        ((b)+((i)+DCTSIZE2*0)*SIZEOF_DCTELEM)
+%define CORRECTION(i,b)        ((b)+((i)+DCTSIZE2*1)*SIZEOF_DCTELEM)
+%define SHIFT(i,b)     ((b)+((i)+DCTSIZE2*3)*SIZEOF_DCTELEM)
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; DCTELEM * divisors
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+%define UNROLL 2
+
+       align   16
+       global  EXTN(jpeg_quantize_int)
+
+EXTN(jpeg_quantize_int):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     ebx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     ecx, DCTSIZE2/UNROLL
+       alignx  16,7
+.quantloop:
+       push    ecx
+
+%assign i 0    ; i=0;
+%rep UNROLL    ; ---- repeat (UNROLL) times ----
+       mov     cx, DCTELEM [esi+(i)*SIZEOF_DCTELEM]
+       mov     ax,cx
+       sar     cx,(WORD_BIT-1)
+       xor     ax,cx           ; if (ax < 0) ax = -ax;
+       sub     ax,cx
+       add     ax, DCTELEM [CORRECTION(i,ebx)] ; correction + roundfactor
+       shl     ax,1
+       mul     DCTELEM [RECIPROCAL(i,ebx)]     ; reciprocal
+       mov     ax,cx
+       mov     cx, DCTELEM [SHIFT(i,ebx)]      ; shift
+       shr     dx,cl
+       xor     dx,ax
+       sub     dx,ax
+       mov     JCOEF [edi+(i)*SIZEOF_JCOEF], dx
+%assign i i+1  ; i++;
+%endrep                ; ---- repeat end ----
+
+       pop     ecx
+
+       add     esi, byte UNROLL*SIZEOF_DCTELEM
+       add     ebx, byte UNROLL*SIZEOF_DCTELEM
+       add     edi, byte UNROLL*SIZEOF_JCOEF
+       dec     ecx
+       jnz     .quantloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%else ; JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jpeg_quantize_idiv (JCOEFPTR coef_block, DCTELEM * divisors,
+;                     DCTELEM * workspace);
+;
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; DCTELEM * divisors
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_idiv)
+
+EXTN(jpeg_quantize_idiv):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     ebx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     ecx, DCTSIZE2
+       alignx  16,7
+.quantloop:
+       push    ecx
+
+       movsx   ecx, DCTELEM [esi]      ; temp
+       mov     eax,ecx
+       sar     ecx,(DWORD_BIT-1)
+       xor     edx,edx
+       mov     dx, DCTELEM [ebx]       ; qval
+       xor     eax,ecx                 ; if (eax < 0) eax = -eax;
+       shr     edx,1
+       sub     eax,ecx
+       cmp     eax,edx                 ; if (temp + qval/2 >= qval)
+       jge     short .quant
+       ; ---- if the quantized coefficient is zero
+       xor     eax,eax
+       jmp     short .output
+       alignx  16,7
+.quant:        ; ---- do quantization
+       add     eax,edx
+       xor     edx,edx
+       div     DCTELEM [ebx]           ; Q:ax,R:dx
+       xor     ax,cx
+       sub     ax,cx
+       alignx  16,7
+.output:
+       mov     JCOEF [edi], ax
+
+       pop     ecx
+
+       add     esi, byte SIZEOF_DCTELEM
+       add     ebx, byte SIZEOF_DCTELEM
+       add     edi, byte SIZEOF_JCOEF
+       dec     ecx
+       jnz     short .quantloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION
diff --git a/jcqntmmx.asm b/jcqntmmx.asm
new file mode 100644 (file)
index 0000000..9cdf584
--- /dev/null
@@ -0,0 +1,254 @@
+;
+; jcqntmmx.asm - sample data conversion and quantization (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 27, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef JFDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_int_mmx (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                        DCTELEM * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_int_mmx)
+
+EXTN(jpeg_convsamp_int_mmx):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       pxor    mm6,mm6                 ; mm6=(all 0's)
+       pcmpeqw mm7,mm7
+       psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     eax, JDIMENSION [start_col]
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm0=(01234567)
+       movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm1=(89ABCDEF)
+
+       mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm2=(GHIJKLMN)
+       movq    mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm3=(OPQRSTUV)
+
+       movq      mm4,mm0
+       punpcklbw mm0,mm6               ; mm0=(0123)
+       punpckhbw mm4,mm6               ; mm4=(4567)
+       movq      mm5,mm1
+       punpcklbw mm1,mm6               ; mm1=(89AB)
+       punpckhbw mm5,mm6               ; mm5=(CDEF)
+
+       paddw   mm0,mm7
+       paddw   mm4,mm7
+       paddw   mm1,mm7
+       paddw   mm5,mm7
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5
+
+       movq      mm0,mm2
+       punpcklbw mm2,mm6               ; mm2=(GHIJ)
+       punpckhbw mm0,mm6               ; mm0=(KLMN)
+       movq      mm4,mm3
+       punpcklbw mm3,mm6               ; mm3=(OPQR)
+       punpckhbw mm4,mm6               ; mm4=(STUV)
+
+       paddw   mm2,mm7
+       paddw   mm0,mm7
+       paddw   mm3,mm7
+       paddw   mm4,mm7
+
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2
+       movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3
+       movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4
+
+       add     esi, byte 4*SIZEOF_JSAMPROW
+       add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     short .convloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jpeg_quantize_int_mmx (JCOEFPTR coef_block, DCTELEM * divisors,
+;                        DCTELEM * workspace);
+;
+
+%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)      MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; DCTELEM * divisors
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_int_mmx)
+
+EXTN(jpeg_quantize_int_mmx):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     edx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     ah, 2
+       alignx  16,7
+.quantloop1:
+       mov     al, DCTSIZE2/8/2
+       alignx  16,7
+.quantloop2:
+       movq    mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
+       movq    mm0,mm2
+       movq    mm1,mm3
+       psraw   mm2,(WORD_BIT-1)
+       psraw   mm3,(WORD_BIT-1)
+       pxor    mm0,mm2
+       pxor    mm1,mm3
+       psubw   mm0,mm2         ; if (mm0 < 0) mm0 = -mm0;
+       psubw   mm1,mm3         ; if (mm1 < 0) mm1 = -mm1;
+
+       ; unsigned long unsigned_multiply(unsigned short x, unsigned short y)
+       ; {
+       ;   enum { SHORT_BIT = 16 };
+       ;   signed short sx = (signed short) x;
+       ;   signed short sy = (signed short) y;
+       ;   signed long sz;
+       ; 
+       ;   sz = (long) sx * (long) sy;     /* signed multiply */
+       ; 
+       ;   if (sx < 0) sz += (long) sy << SHORT_BIT;
+       ;   if (sy < 0) sz += (long) sx << SHORT_BIT;
+       ; 
+       ;   return (unsigned long) sz;
+       ; }
+
+       paddw   mm0, MMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
+       paddw   mm1, MMWORD [CORRECTION(0,1,edx)]
+       psllw   mm0,1
+       psllw   mm1,1
+       movq    mm4,mm0
+       movq    mm5,mm1
+       pmulhw  mm0, MMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
+       pmulhw  mm1, MMWORD [RECIPROCAL(0,1,edx)]
+       movq    mm6, MMWORD [SCALE(0,0,edx)]    ; scale
+       movq    mm7, MMWORD [SCALE(0,1,edx)]
+       paddw   mm0,mm4         ; reciprocal is always negative (MSB=1)
+       paddw   mm1,mm5
+       psllw   mm0,1
+       psllw   mm1,1
+       movq    mm4,mm0
+       movq    mm5,mm1
+       pmulhw  mm0,mm6
+       pmulhw  mm1,mm7
+       psraw   mm6,(WORD_BIT-1)
+       psraw   mm7,(WORD_BIT-1)
+       pand    mm6,mm4
+       pand    mm7,mm5
+       paddw   mm0,mm6
+       paddw   mm1,mm7
+       psraw   mm4,(WORD_BIT-1)
+       psraw   mm5,(WORD_BIT-1)
+       pand    mm4, MMWORD [SCALE(0,0,edx)]    ; scale
+       pand    mm5, MMWORD [SCALE(0,1,edx)]
+       paddw   mm0,mm4
+       paddw   mm1,mm5
+
+       pxor    mm0,mm2
+       pxor    mm1,mm3
+       psubw   mm0,mm2
+       psubw   mm1,mm3
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1
+
+       add     esi, byte 8*SIZEOF_DCTELEM
+       add     edx, byte 8*SIZEOF_DCTELEM
+       add     edi, byte 8*SIZEOF_JCOEF
+       dec     al
+       jnz     near .quantloop2
+       dec     ah
+       jnz     near .quantloop1        ; to avoid branch misprediction
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION
+%endif ; JFDCT_INT_MMX_SUPPORTED
diff --git a/jcqnts2f.asm b/jcqnts2f.asm
new file mode 100644 (file)
index 0000000..faf663e
--- /dev/null
@@ -0,0 +1,178 @@
+;
+; jcqnts2f.asm - sample data conversion and quantization (SSE & SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 18, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_flt_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                         FAST_FLOAT * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_flt_sse2)
+
+EXTN(jpeg_convsamp_flt_sse2):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       pcmpeqw  xmm7,xmm7
+       psllw    xmm7,7
+       packsswb xmm7,xmm7              ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     eax, JDIMENSION [start_col]
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/2
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    xmm0, _MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+       movq    xmm1, _MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+       psubb   xmm0,xmm7                       ; xmm0=(01234567)
+       psubb   xmm1,xmm7                       ; xmm1=(89ABCDEF)
+
+       punpcklbw xmm0,xmm0                     ; xmm0=(*0*1*2*3*4*5*6*7)
+       punpcklbw xmm1,xmm1                     ; xmm1=(*8*9*A*B*C*D*E*F)
+
+       punpcklwd xmm2,xmm0                     ; xmm2=(***0***1***2***3)
+       punpckhwd xmm0,xmm0                     ; xmm0=(***4***5***6***7)
+       punpcklwd xmm3,xmm1                     ; xmm3=(***8***9***A***B)
+       punpckhwd xmm1,xmm1                     ; xmm1=(***C***D***E***F)
+
+       psrad     xmm2,(DWORD_BIT-BYTE_BIT)     ; xmm2=(0123)
+       psrad     xmm0,(DWORD_BIT-BYTE_BIT)     ; xmm0=(4567)
+       cvtdq2ps  xmm2,xmm2                     ; xmm2=(0123)
+       cvtdq2ps  xmm0,xmm0                     ; xmm0=(4567)
+       psrad     xmm3,(DWORD_BIT-BYTE_BIT)     ; xmm3=(89AB)
+       psrad     xmm1,(DWORD_BIT-BYTE_BIT)     ; xmm1=(CDEF)
+       cvtdq2ps  xmm3,xmm3                     ; xmm3=(89AB)
+       cvtdq2ps  xmm1,xmm1                     ; xmm1=(CDEF)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+
+       add     esi, byte 2*SIZEOF_JSAMPROW
+       add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     short .convloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jpeg_quantize_flt_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                         FAST_FLOAT * workspace);
+;
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; FAST_FLOAT * divisors
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_flt_sse2)
+
+EXTN(jpeg_quantize_flt_sse2):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     edx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     eax, DCTSIZE2/16
+       alignx  16,7
+.quantloop:
+       movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+       mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+       mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+       cvtps2dq xmm0,xmm0
+       cvtps2dq xmm1,xmm1
+       cvtps2dq xmm2,xmm2
+       cvtps2dq xmm3,xmm3
+
+       packssdw xmm0,xmm1
+       packssdw xmm2,xmm3
+
+       movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0
+       movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2
+
+       add     esi, byte 16*SIZEOF_FAST_FLOAT
+       add     edx, byte 16*SIZEOF_FAST_FLOAT
+       add     edi, byte 16*SIZEOF_JCOEF
+       dec     eax
+       jnz     short .quantloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; JFDCT_FLT_SSE_SSE2_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jcqnts2i.asm b/jcqnts2i.asm
new file mode 100644 (file)
index 0000000..71bae2c
--- /dev/null
@@ -0,0 +1,216 @@
+;
+; jcqnts2i.asm - sample data conversion and quantization (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 27, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef JFDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_int_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                         DCTELEM * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_int_sse2)
+
+EXTN(jpeg_convsamp_int_sse2):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       pxor    xmm6,xmm6               ; xmm6=(all 0's)
+       pcmpeqw xmm7,xmm7
+       psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     eax, JDIMENSION [start_col]
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    xmm0, _MMWORD [ebx+eax*SIZEOF_JSAMPLE]  ; xmm0=(01234567)
+       movq    xmm1, _MMWORD [edx+eax*SIZEOF_JSAMPLE]  ; xmm1=(89ABCDEF)
+
+       mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    xmm2, _MMWORD [ebx+eax*SIZEOF_JSAMPLE]  ; xmm2=(GHIJKLMN)
+       movq    xmm3, _MMWORD [edx+eax*SIZEOF_JSAMPLE]  ; xmm3=(OPQRSTUV)
+
+       punpcklbw xmm0,xmm6             ; xmm0=(01234567)
+       punpcklbw xmm1,xmm6             ; xmm1=(89ABCDEF)
+       paddw     xmm0,xmm7
+       paddw     xmm1,xmm7
+       punpcklbw xmm2,xmm6             ; xmm2=(GHIJKLMN)
+       punpcklbw xmm3,xmm6             ; xmm3=(OPQRSTUV)
+       paddw     xmm2,xmm7
+       paddw     xmm3,xmm7
+
+       movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+       movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+       movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+       movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+
+       add     esi, byte 4*SIZEOF_JSAMPROW
+       add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     short .convloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jpeg_quantize_int_sse2 (JCOEFPTR coef_block, DCTELEM * divisors,
+;                         DCTELEM * workspace);
+;
+
+%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)      XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; DCTELEM * divisors
+%define workspace      ebp+16          ; DCTELEM * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_int_sse2)
+
+EXTN(jpeg_quantize_int_sse2):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     edx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     eax, DCTSIZE2/32
+       alignx  16,7
+.quantloop:
+       movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+       movdqa  xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
+       movdqa  xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
+       movdqa  xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
+       movdqa  xmm0,xmm4
+       movdqa  xmm1,xmm5
+       movdqa  xmm2,xmm6
+       movdqa  xmm3,xmm7
+       psraw   xmm4,(WORD_BIT-1)
+       psraw   xmm5,(WORD_BIT-1)
+       psraw   xmm6,(WORD_BIT-1)
+       psraw   xmm7,(WORD_BIT-1)
+       pxor    xmm0,xmm4
+       pxor    xmm1,xmm5
+       pxor    xmm2,xmm6
+       pxor    xmm3,xmm7
+       psubw   xmm0,xmm4               ; if (xmm0 < 0) xmm0 = -xmm0;
+       psubw   xmm1,xmm5               ; if (xmm1 < 0) xmm1 = -xmm1;
+       psubw   xmm2,xmm6               ; if (xmm2 < 0) xmm2 = -xmm2;
+       psubw   xmm3,xmm7               ; if (xmm3 < 0) xmm3 = -xmm3;
+
+       paddw   xmm0, XMMWORD [CORRECTION(0,0,edx)]  ; correction + roundfactor
+       paddw   xmm1, XMMWORD [CORRECTION(1,0,edx)]
+       paddw   xmm2, XMMWORD [CORRECTION(2,0,edx)]
+       paddw   xmm3, XMMWORD [CORRECTION(3,0,edx)]
+       psllw   xmm0,1
+       psllw   xmm1,1
+       psllw   xmm2,1
+       psllw   xmm3,1
+       pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)]  ; reciprocal
+       pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)]
+       pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)]
+       pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)]
+       psllw   xmm0,1
+       psllw   xmm1,1
+       psllw   xmm2,1
+       psllw   xmm3,1
+       pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)]  ; scale
+       pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)]
+       pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)]
+       pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)]
+
+       pxor    xmm0,xmm4
+       pxor    xmm1,xmm5
+       pxor    xmm2,xmm6
+       pxor    xmm3,xmm7
+       psubw   xmm0,xmm4
+       psubw   xmm1,xmm5
+       psubw   xmm2,xmm6
+       psubw   xmm3,xmm7
+       movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+       movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+       movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+       movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+
+       add     esi, byte 32*SIZEOF_DCTELEM
+       add     edx, byte 32*SIZEOF_DCTELEM
+       add     edi, byte 32*SIZEOF_JCOEF
+       dec     eax
+       jnz     near .quantloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION
+%endif ; JFDCT_INT_SSE2_SUPPORTED
diff --git a/jcqntsse.asm b/jcqntsse.asm
new file mode 100644 (file)
index 0000000..fe99a20
--- /dev/null
@@ -0,0 +1,218 @@
+;
+; jcqntsse.asm - sample data conversion and quantization (SSE & MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 12, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JFDCT_FLT_SSE_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jpeg_convsamp_flt_sse (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                        FAST_FLOAT * workspace);
+;
+
+%define sample_data    ebp+8           ; JSAMPARRAY sample_data
+%define start_col      ebp+12          ; JDIMENSION start_col
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_convsamp_flt_sse)
+
+EXTN(jpeg_convsamp_flt_sse):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       pcmpeqw  mm7,mm7
+       psllw    mm7,7
+       packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
+
+       mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+       mov     eax, JDIMENSION [start_col]
+       mov     edi, POINTER [workspace]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/2
+       alignx  16,7
+.convloop:
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+       mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+       movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+       movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+       psubb   mm0,mm7                         ; mm0=(01234567)
+       psubb   mm1,mm7                         ; mm1=(89ABCDEF)
+
+       punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+       punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+       punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+       punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
+
+       punpcklwd mm4,mm2                       ; mm4=(***0***1)
+       punpckhwd mm2,mm2                       ; mm2=(***2***3)
+       punpcklwd mm5,mm0                       ; mm5=(***4***5)
+       punpckhwd mm0,mm0                       ; mm0=(***6***7)
+
+       psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(01)
+       psrad     mm2,(DWORD_BIT-BYTE_BIT)      ; mm2=(23)
+       cvtpi2ps  xmm0,mm4                      ; xmm0=(01**)
+       cvtpi2ps  xmm1,mm2                      ; xmm1=(23**)
+       psrad     mm5,(DWORD_BIT-BYTE_BIT)      ; mm5=(45)
+       psrad     mm0,(DWORD_BIT-BYTE_BIT)      ; mm0=(67)
+       cvtpi2ps  xmm2,mm5                      ; xmm2=(45**)
+       cvtpi2ps  xmm3,mm0                      ; xmm3=(67**)
+
+       punpcklwd mm6,mm3                       ; mm6=(***8***9)
+       punpckhwd mm3,mm3                       ; mm3=(***A***B)
+       punpcklwd mm4,mm1                       ; mm4=(***C***D)
+       punpckhwd mm1,mm1                       ; mm1=(***E***F)
+
+       psrad     mm6,(DWORD_BIT-BYTE_BIT)      ; mm6=(89)
+       psrad     mm3,(DWORD_BIT-BYTE_BIT)      ; mm3=(AB)
+       cvtpi2ps  xmm4,mm6                      ; xmm4=(89**)
+       cvtpi2ps  xmm5,mm3                      ; xmm5=(AB**)
+       psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(CD)
+       psrad     mm1,(DWORD_BIT-BYTE_BIT)      ; mm1=(EF)
+       cvtpi2ps  xmm6,mm4                      ; xmm6=(CD**)
+       cvtpi2ps  xmm7,mm1                      ; xmm7=(EF**)
+
+       movlhps   xmm0,xmm1                     ; xmm0=(0123)
+       movlhps   xmm2,xmm3                     ; xmm2=(4567)
+       movlhps   xmm4,xmm5                     ; xmm4=(89AB)
+       movlhps   xmm6,xmm7                     ; xmm6=(CDEF)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+
+       add     esi, byte 2*SIZEOF_JSAMPROW
+       add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .convloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jpeg_quantize_flt_sse (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                        FAST_FLOAT * workspace);
+;
+
+%define coef_block     ebp+8           ; JCOEFPTR coef_block
+%define divisors       ebp+12          ; FAST_FLOAT * divisors
+%define workspace      ebp+16          ; FAST_FLOAT * workspace
+
+       align   16
+       global  EXTN(jpeg_quantize_flt_sse)
+
+EXTN(jpeg_quantize_flt_sse):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     esi, POINTER [workspace]
+       mov     edx, POINTER [divisors]
+       mov     edi, JCOEFPTR [coef_block]
+       mov     eax, DCTSIZE2/16
+       alignx  16,7
+.quantloop:
+       movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+       mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+       mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+       movhlps  xmm4,xmm0
+       movhlps  xmm5,xmm1
+
+       cvtps2pi mm0,xmm0
+       cvtps2pi mm1,xmm1
+       cvtps2pi mm4,xmm4
+       cvtps2pi mm5,xmm5
+
+       movhlps  xmm6,xmm2
+       movhlps  xmm7,xmm3
+
+       cvtps2pi mm2,xmm2
+       cvtps2pi mm3,xmm3
+       cvtps2pi mm6,xmm6
+       cvtps2pi mm7,xmm7
+
+       packssdw mm0,mm4
+       packssdw mm1,mm5
+       packssdw mm2,mm6
+       packssdw mm3,mm7
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+
+       add     esi, byte 16*SIZEOF_FAST_FLOAT
+       add     edx, byte 16*SIZEOF_FAST_FLOAT
+       add     edi, byte 16*SIZEOF_JCOEF
+       dec     eax
+       jnz     short .quantloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; JFDCT_FLT_SSE_MMX_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jcsammmx.asm b/jcsammmx.asm
new file mode 100644 (file)
index 0000000..95fc825
--- /dev/null
@@ -0,0 +1,328 @@
+;
+; jcsammmx.asm - downsampling (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 23, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%ifdef JCSAMPLE_MMX_SUPPORTED
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Downsample pixel values of a single component.
+; This version handles the common case of 2:1 horizontal and 1:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jpeg_h2v1_downsample_mmx (j_compress_ptr cinfo,
+;                           jpeg_component_info * compptr,
+;                           JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)  (b)+16          ; JSAMPARRAY input_data
+%define output_data(b) (b)+20          ; JSAMPARRAY output_data
+
+       align   16
+       global  EXTN(jpeg_h2v1_downsample_mmx)
+
+EXTN(jpeg_h2v1_downsample_mmx):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     ecx, POINTER [compptr(ebp)]
+       mov     ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)]
+       shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+       jz      near .return
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jcstruct_image_width(edx)]
+
+       ; -- expand_right_edge
+
+       push    ecx
+       shl     ecx,1                           ; output_cols * 2
+       sub     ecx,edx
+       jle     short .expand_end
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, INT [jcstruct_max_v_samp_factor(eax)]
+       test    eax,eax
+       jle     short .expand_end
+
+       cld
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       alignx  16,7
+.expandloop:
+       push    eax
+       push    ecx
+
+       mov     edi, JSAMPROW [esi]
+       add     edi,edx
+       mov     al, JSAMPLE [edi-1]
+
+       rep stosb
+
+       pop     ecx
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       dec     eax
+       jg      short .expandloop
+
+.expand_end:
+       pop     ecx                             ; output_cols
+
+       ; -- h2v1_downsample
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_v_samp_factor(eax)]  ; rowctr
+       test    eax,eax
+       jle     short .return
+
+       mov       edx, 0x00010000       ; bias pattern
+       movd      mm7,edx
+       pcmpeqw   mm6,mm6
+       punpckldq mm7,mm7               ; mm7={0, 1, 0, 1}
+       psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+       alignx  16,7
+.rowloop:
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]             ; inptr
+       mov     edi, JSAMPROW [edi]             ; outptr
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mm1, MMWORD [esi+1*SIZEOF_MMWORD]
+       movq    mm2,mm0
+       movq    mm3,mm1
+
+       pand    mm0,mm6
+       psrlw   mm2,BYTE_BIT
+       pand    mm1,mm6
+       psrlw   mm3,BYTE_BIT
+
+       paddw   mm0,mm2
+       paddw   mm1,mm3
+       paddw   mm0,mm7
+       paddw   mm1,mm7
+       psrlw   mm0,1
+       psrlw   mm1,1
+
+       packuswb mm0,mm1
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+
+       add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+       add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+       sub     ecx, byte SIZEOF_MMWORD         ; outcol
+       jnz     short .columnloop
+
+       pop     esi
+       pop     edi
+       pop     ecx
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     eax                             ; rowctr
+       jg      short .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Downsample pixel values of a single component.
+; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jpeg_h2v2_downsample_mmx (j_compress_ptr cinfo,
+;                           jpeg_component_info * compptr,
+;                           JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)  (b)+16          ; JSAMPARRAY input_data
+%define output_data(b) (b)+20          ; JSAMPARRAY output_data
+
+       align   16
+       global  EXTN(jpeg_h2v2_downsample_mmx)
+
+EXTN(jpeg_h2v2_downsample_mmx):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     ecx, POINTER [compptr(ebp)]
+       mov     ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)]
+       shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+       jz      near .return
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jcstruct_image_width(edx)]
+
+       ; -- expand_right_edge
+
+       push    ecx
+       shl     ecx,1                           ; output_cols * 2
+       sub     ecx,edx
+       jle     short .expand_end
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, INT [jcstruct_max_v_samp_factor(eax)]
+       test    eax,eax
+       jle     short .expand_end
+
+       cld
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       alignx  16,7
+.expandloop:
+       push    eax
+       push    ecx
+
+       mov     edi, JSAMPROW [esi]
+       add     edi,edx
+       mov     al, JSAMPLE [edi-1]
+
+       rep stosb
+
+       pop     ecx
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       dec     eax
+       jg      short .expandloop
+
+.expand_end:
+       pop     ecx                             ; output_cols
+
+       ; -- h2v2_downsample
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_v_samp_factor(eax)]  ; rowctr
+       test    eax,eax
+       jle     near .return
+
+       mov       edx, 0x00020001       ; bias pattern
+       movd      mm7,edx
+       pcmpeqw   mm6,mm6
+       punpckldq mm7,mm7               ; mm7={1, 2, 1, 2}
+       psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+       alignx  16,7
+.rowloop:
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+       mov     edi, JSAMPROW [edi]                     ; outptr
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [edx+0*SIZEOF_MMWORD]
+       movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mm2, MMWORD [edx+1*SIZEOF_MMWORD]
+       movq    mm3, MMWORD [esi+1*SIZEOF_MMWORD]
+
+       movq    mm4,mm0
+       movq    mm5,mm1
+       pand    mm0,mm6
+       psrlw   mm4,BYTE_BIT
+       pand    mm1,mm6
+       psrlw   mm5,BYTE_BIT
+       paddw   mm0,mm4
+       paddw   mm1,mm5
+
+       movq    mm4,mm2
+       movq    mm5,mm3
+       pand    mm2,mm6
+       psrlw   mm4,BYTE_BIT
+       pand    mm3,mm6
+       psrlw   mm5,BYTE_BIT
+       paddw   mm2,mm4
+       paddw   mm3,mm5
+
+       paddw   mm0,mm1
+       paddw   mm2,mm3
+       paddw   mm0,mm7
+       paddw   mm2,mm7
+       psrlw   mm0,2
+       psrlw   mm2,2
+
+       packuswb mm0,mm2
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+
+       add     edx, byte 2*SIZEOF_MMWORD       ; inptr0
+       add     esi, byte 2*SIZEOF_MMWORD       ; inptr1
+       add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+       sub     ecx, byte SIZEOF_MMWORD         ; outcol
+       jnz     near .columnloop
+
+       pop     esi
+       pop     edi
+       pop     ecx
+
+       add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+       dec     eax                             ; rowctr
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; JCSAMPLE_MMX_SUPPORTED
index 212ec8757c4ca865eba34a80530d42e8d371dec7..9af7f15e29011807e50843116b8acac09484c18e 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file contains downsampling routines.
  *
  * Downsampling input data is counted in "row groups".  A row group
@@ -48,6 +55,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcolsamp.h"          /* Private declarations */
 
 
 /* Pointer to routine to downsample a single component */
@@ -467,6 +475,7 @@ jinit_downsampler (j_compress_ptr cinfo)
   int ci;
   jpeg_component_info * compptr;
   boolean smoothok = TRUE;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   downsample = (my_downsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -494,7 +503,17 @@ jinit_downsampler (j_compress_ptr cinfo)
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
               compptr->v_samp_factor == cinfo->max_v_samp_factor) {
       smoothok = FALSE;
-      downsample->methods[ci] = h2v1_downsample;
+#ifdef JCSAMPLE_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2)
+       downsample->methods[ci] = jpeg_h2v1_downsample_sse2;
+      else
+#endif
+#ifdef JCSAMPLE_MMX_SUPPORTED
+      if (simd & JSIMD_MMX)
+       downsample->methods[ci] = jpeg_h2v1_downsample_mmx;
+      else
+#endif
+       downsample->methods[ci] = h2v1_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
               compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
@@ -502,6 +521,16 @@ jinit_downsampler (j_compress_ptr cinfo)
        downsample->methods[ci] = h2v2_smooth_downsample;
        downsample->pub.need_context_rows = TRUE;
       } else
+#endif
+#ifdef JCSAMPLE_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2)
+       downsample->methods[ci] = jpeg_h2v2_downsample_sse2;
+      else
+#endif
+#ifdef JCSAMPLE_MMX_SUPPORTED
+      if (simd & JSIMD_MMX)
+       downsample->methods[ci] = jpeg_h2v2_downsample_mmx;
+      else
 #endif
        downsample->methods[ci] = h2v2_downsample;
     } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
@@ -517,3 +546,25 @@ jinit_downsampler (j_compress_ptr cinfo)
     TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
 #endif
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_downsampler (j_compress_ptr cinfo)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+#ifdef JCSAMPLE_SSE2_SUPPORTED
+  if (simd & JSIMD_SSE2)
+    return JSIMD_SSE2;
+#endif
+#ifdef JCSAMPLE_MMX_SUPPORTED
+  if (simd & JSIMD_MMX)
+    return JSIMD_MMX;
+#endif
+
+  return JSIMD_NONE;
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
diff --git a/jcsamss2.asm b/jcsamss2.asm
new file mode 100644 (file)
index 0000000..e187d63
--- /dev/null
@@ -0,0 +1,355 @@
+;
+; jcsamss2.asm - downsampling (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : January 23, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%ifdef JCSAMPLE_SSE2_SUPPORTED
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Downsample pixel values of a single component.
+; This version handles the common case of 2:1 horizontal and 1:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jpeg_h2v1_downsample_sse2 (j_compress_ptr cinfo,
+;                            jpeg_component_info * compptr,
+;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)  (b)+16          ; JSAMPARRAY input_data
+%define output_data(b) (b)+20          ; JSAMPARRAY output_data
+
+       align   16
+       global  EXTN(jpeg_h2v1_downsample_sse2)
+
+EXTN(jpeg_h2v1_downsample_sse2):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     ecx, POINTER [compptr(ebp)]
+       mov     ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)]
+       shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+       jz      near .return
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jcstruct_image_width(edx)]
+
+       ; -- expand_right_edge
+
+       push    ecx
+       shl     ecx,1                           ; output_cols * 2
+       sub     ecx,edx
+       jle     short .expand_end
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, INT [jcstruct_max_v_samp_factor(eax)]
+       test    eax,eax
+       jle     short .expand_end
+
+       cld
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       alignx  16,7
+.expandloop:
+       push    eax
+       push    ecx
+
+       mov     edi, JSAMPROW [esi]
+       add     edi,edx
+       mov     al, JSAMPLE [edi-1]
+
+       rep stosb
+
+       pop     ecx
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       dec     eax
+       jg      short .expandloop
+
+.expand_end:
+       pop     ecx                             ; output_cols
+
+       ; -- h2v1_downsample
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_v_samp_factor(eax)]  ; rowctr
+       test    eax,eax
+       jle     near .return
+
+       mov     edx, 0x00010000         ; bias pattern
+       movd    xmm7,edx
+       pcmpeqw xmm6,xmm6
+       pshufd  xmm7,xmm7,0x00          ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
+       psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+       alignx  16,7
+.rowloop:
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]             ; inptr
+       mov     edi, JSAMPROW [edi]             ; outptr
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     short .columnloop
+       alignx  16,7
+
+.columnloop_r8:
+       movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       pxor    xmm1,xmm1
+       mov     ecx, SIZEOF_XMMWORD
+       jmp     short .downsample
+       alignx  16,7
+
+.columnloop:
+       movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqa  xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+.downsample:
+       movdqa  xmm2,xmm0
+       movdqa  xmm3,xmm1
+
+       pand    xmm0,xmm6
+       psrlw   xmm2,BYTE_BIT
+       pand    xmm1,xmm6
+       psrlw   xmm3,BYTE_BIT
+
+       paddw   xmm0,xmm2
+       paddw   xmm1,xmm3
+       paddw   xmm0,xmm7
+       paddw   xmm1,xmm7
+       psrlw   xmm0,1
+       psrlw   xmm1,1
+
+       packuswb xmm0,xmm1
+
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+
+       sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+       add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+       add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     short .columnloop
+       test    ecx,ecx
+       jnz     short .columnloop_r8
+
+       pop     esi
+       pop     edi
+       pop     ecx
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     eax                             ; rowctr
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Downsample pixel values of a single component.
+; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jpeg_h2v2_downsample_sse2 (j_compress_ptr cinfo,
+;                            jpeg_component_info * compptr,
+;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define cinfo(b)       (b)+8           ; j_compress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)  (b)+16          ; JSAMPARRAY input_data
+%define output_data(b) (b)+20          ; JSAMPARRAY output_data
+
+       align   16
+       global  EXTN(jpeg_h2v2_downsample_sse2)
+
+EXTN(jpeg_h2v2_downsample_sse2):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     ecx, POINTER [compptr(ebp)]
+       mov     ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)]
+       shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+       jz      near .return
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jcstruct_image_width(edx)]
+
+       ; -- expand_right_edge
+
+       push    ecx
+       shl     ecx,1                           ; output_cols * 2
+       sub     ecx,edx
+       jle     short .expand_end
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, INT [jcstruct_max_v_samp_factor(eax)]
+       test    eax,eax
+       jle     short .expand_end
+
+       cld
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       alignx  16,7
+.expandloop:
+       push    eax
+       push    ecx
+
+       mov     edi, JSAMPROW [esi]
+       add     edi,edx
+       mov     al, JSAMPLE [edi-1]
+
+       rep stosb
+
+       pop     ecx
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       dec     eax
+       jg      short .expandloop
+
+.expand_end:
+       pop     ecx                             ; output_cols
+
+       ; -- h2v2_downsample
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_v_samp_factor(eax)]  ; rowctr
+       test    eax,eax
+       jle     near .return
+
+       mov     edx, 0x00020001         ; bias pattern
+       movd    xmm7,edx
+       pcmpeqw xmm6,xmm6
+       pshufd  xmm7,xmm7,0x00          ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
+       psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+       alignx  16,7
+.rowloop:
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+       mov     edi, JSAMPROW [edi]                     ; outptr
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     short .columnloop
+       alignx  16,7
+
+.columnloop_r8:
+       movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+       movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       pxor    xmm2,xmm2
+       pxor    xmm3,xmm3
+       mov     ecx, SIZEOF_XMMWORD
+       jmp     short .downsample
+       alignx  16,7
+
+.columnloop:
+       movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+       movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqa  xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
+       movdqa  xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+.downsample:
+       movdqa  xmm4,xmm0
+       movdqa  xmm5,xmm1
+       pand    xmm0,xmm6
+       psrlw   xmm4,BYTE_BIT
+       pand    xmm1,xmm6
+       psrlw   xmm5,BYTE_BIT
+       paddw   xmm0,xmm4
+       paddw   xmm1,xmm5
+
+       movdqa  xmm4,xmm2
+       movdqa  xmm5,xmm3
+       pand    xmm2,xmm6
+       psrlw   xmm4,BYTE_BIT
+       pand    xmm3,xmm6
+       psrlw   xmm5,BYTE_BIT
+       paddw   xmm2,xmm4
+       paddw   xmm3,xmm5
+
+       paddw   xmm0,xmm1
+       paddw   xmm2,xmm3
+       paddw   xmm0,xmm7
+       paddw   xmm2,xmm7
+       psrlw   xmm0,2
+       psrlw   xmm2,2
+
+       packuswb xmm0,xmm2
+
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+
+       sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+       add     edx, byte 2*SIZEOF_XMMWORD      ; inptr0
+       add     esi, byte 2*SIZEOF_XMMWORD      ; inptr1
+       add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jae     near .columnloop
+       test    ecx,ecx
+       jnz     near .columnloop_r8
+
+       pop     esi
+       pop     edi
+       pop     ecx
+
+       add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+       dec     eax                             ; rowctr
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+%endif ; JCSAMPLE_SSE2_SUPPORTED
index 4938d20fcb655632d640534ba19e73f0c8fdccbc..1a515d392e4027aead04ab7af5ca7ea624dd2011 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : December 18, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains the coefficient buffer controller for decompression.
  * This controller is the top level of the JPEG decompressor proper.
  * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
@@ -133,6 +140,11 @@ start_output_pass (j_decompress_ptr cinfo)
 }
 
 
+#ifndef NEED_FAR_POINTERS
+#undef jzero_far
+#define jzero_far(target, bytestozero)  MEMZERO(target, bytestozero)
+#endif
+
 /*
  * Decompress and return some data in the single-pass case.
  * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
@@ -150,15 +162,61 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
   JDIMENSION MCU_col_num;      /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  int blkn, ci, ctr, xindex, yindex, yoffset;
   JSAMPARRAY output_ptr;
-  JDIMENSION start_col, output_col;
+  JDIMENSION output_col;
   jpeg_component_info *compptr;
   inverse_DCT_method_ptr inverse_DCT;
+  JSAMPARRAY output_ptr_blk[D_MAX_BLOCKS_IN_MCU];
+  JDIMENSION output_col_off[D_MAX_BLOCKS_IN_MCU];
+  jpeg_component_info *compptr_blk[D_MAX_BLOCKS_IN_MCU];
+  inverse_DCT_method_ptr inverse_DCT_blk_1[D_MAX_BLOCKS_IN_MCU];
+  inverse_DCT_method_ptr inverse_DCT_blk_2[D_MAX_BLOCKS_IN_MCU];
+  inverse_DCT_method_ptr *inverse_DCT_blk;
 
   /* Loop to process as much as one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
+    /* Determine where data should go in output_buf and do the IDCT thing.
+     * We skip dummy blocks at the right and bottom edges (but blkn gets
+     * incremented past them!).  Note the inner loop relies on having
+     * allocated the MCU_buffer[] blocks sequentially.
+     */
+    blkn = 0;                  /* index of current DCT block within MCU */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Don't bother to IDCT an uninteresting component. */
+      if (! compptr->component_needed) {
+       for (ctr = compptr->MCU_blocks; ctr > 0; ctr--) {
+         inverse_DCT_blk_1[blkn] = inverse_DCT_blk_2[blkn] = NULL;
+         blkn++;
+       }
+       continue;
+      }
+      inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+      output_ptr = output_buf[compptr->component_index] +
+       yoffset * compptr->DCT_scaled_size;
+      for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+       if (cinfo->input_iMCU_row < last_iMCU_row ||
+           yoffset+yindex < compptr->last_row_height) {
+         for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+           compptr_blk[blkn] = compptr;
+           output_ptr_blk[blkn] = output_ptr;
+           output_col_off[blkn] = xindex * compptr->DCT_scaled_size;
+           inverse_DCT_blk_1[blkn] = inverse_DCT;
+           inverse_DCT_blk_2[blkn] = (xindex < compptr->last_col_width) ?
+                                     inverse_DCT : NULL;
+           blkn++;
+         }
+       } else {
+         for (ctr = compptr->MCU_width; ctr > 0; ctr--) {
+           inverse_DCT_blk_1[blkn] = inverse_DCT_blk_2[blkn] = NULL;
+           blkn++;
+         }
+       }
+       output_ptr += compptr->DCT_scaled_size;
+      }
+    }
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
         MCU_col_num++) {
       /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
@@ -170,39 +228,17 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
        coef->MCU_ctr = MCU_col_num;
        return JPEG_SUSPENDED;
       }
-      /* Determine where data should go in output_buf and do the IDCT thing.
-       * We skip dummy blocks at the right and bottom edges (but blkn gets
-       * incremented past them!).  Note the inner loop relies on having
-       * allocated the MCU_buffer[] blocks sequentially.
-       */
-      blkn = 0;                        /* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-       compptr = cinfo->cur_comp_info[ci];
-       /* Don't bother to IDCT an uninteresting component. */
-       if (! compptr->component_needed) {
-         blkn += compptr->MCU_blocks;
+      inverse_DCT_blk = (MCU_col_num < last_MCU_col) ? inverse_DCT_blk_1
+                                                    : inverse_DCT_blk_2;
+      for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+       inverse_DCT = inverse_DCT_blk[blkn];
+       if (inverse_DCT == NULL)
          continue;
-       }
-       inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-       useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-                                                   : compptr->last_col_width;
-       output_ptr = output_buf[compptr->component_index] +
-         yoffset * compptr->DCT_scaled_size;
-       start_col = MCU_col_num * compptr->MCU_sample_width;
-       for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-         if (cinfo->input_iMCU_row < last_iMCU_row ||
-             yoffset+yindex < compptr->last_row_height) {
-           output_col = start_col;
-           for (xindex = 0; xindex < useful_width; xindex++) {
-             (*inverse_DCT) (cinfo, compptr,
-                             (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
-                             output_ptr, output_col);
-             output_col += compptr->DCT_scaled_size;
-           }
-         }
-         blkn += compptr->MCU_width;
-         output_ptr += compptr->DCT_scaled_size;
-       }
+       compptr = compptr_blk[blkn];
+       output_col = MCU_col_num * compptr->MCU_sample_width +
+                    output_col_off[blkn];
+       (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) coef->MCU_buffer[blkn],
+                       output_ptr_blk[blkn], output_col);
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -250,6 +286,8 @@ consume_data (j_decompress_ptr cinfo)
   JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
   JBLOCKROW buffer_ptr;
   jpeg_component_info *compptr;
+  int MCU_width[D_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW MCU_buffer_base[D_MAX_BLOCKS_IN_MCU];
 
   /* Align the virtual buffers for the components used in this scan. */
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
@@ -267,19 +305,24 @@ consume_data (j_decompress_ptr cinfo)
   /* Loop to process one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
+    /* Construct list of pointers to DCT blocks belonging to this MCU */
+    blkn = 0;                  /* index of current DCT block within MCU */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+       buffer_ptr = buffer[ci][yindex+yoffset];
+       for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+         MCU_width[blkn] = compptr->MCU_width;
+         MCU_buffer_base[blkn] = buffer_ptr++;
+         blkn++;
+       }
+      }
+    }
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
         MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;                        /* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-       compptr = cinfo->cur_comp_info[ci];
-       start_col = MCU_col_num * compptr->MCU_width;
-       for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-         buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-         for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-           coef->MCU_buffer[blkn++] = buffer_ptr++;
-         }
-       }
+      for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+       start_col = MCU_col_num * MCU_width[blkn];
+       coef->MCU_buffer[blkn] = MCU_buffer_base[blkn] + start_col;
       }
       /* Try to fetch the MCU. */
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
@@ -453,6 +496,15 @@ smoothing_ok (j_decompress_ptr cinfo)
 }
 
 
+/*
+ * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address
+ * is aligned to a 16-byte boundary; if not, a general-protection exception
+ * (#GP) is generated.
+ */
+
+#define ALIGN_SIZE     16              /* sizeof SSE/SSE2 register */
+#define ALIGN_MEM(p,a) ((void *) (((size_t) (p) + (a) - 1) & -(a)))
+
 /*
  * Variant of decompress_data for use when doing block smoothing.
  */
@@ -471,7 +523,8 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
   jpeg_component_info *compptr;
   inverse_DCT_method_ptr inverse_DCT;
   boolean first_row, last_row;
-  JBLOCK workspace;
+  JCOEF workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(JCOEF)];
+  JCOEF * workptr = (JCOEF *) ALIGN_MEM(workspace, ALIGN_SIZE);
   int *coef_bits;
   JQUANT_TBL *quanttbl;
   INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
@@ -560,7 +613,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       last_block_column = compptr->width_in_blocks - 1;
       for (block_num = 0; block_num <= last_block_column; block_num++) {
        /* Fetch current DCT block into workspace so we can modify it. */
-       jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+       jcopy_block_row(buffer_ptr, (JBLOCKROW) workptr, (JDIMENSION) 1);
        /* Update DC values */
        if (block_num < last_block_column) {
          DC3 = (int) prev_block_row[1][0];
@@ -572,7 +625,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         * and is not known to be fully accurate.
         */
        /* AC01 */
-       if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+       if ((Al=coef_bits[1]) != 0 && workptr[1] == 0) {
          num = 36 * Q00 * (DC4 - DC6);
          if (num >= 0) {
            pred = (int) (((Q01<<7) + num) / (Q01<<8));
@@ -584,10 +637,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
              pred = (1<<Al)-1;
            pred = -pred;
          }
-         workspace[1] = (JCOEF) pred;
+         workptr[1] = (JCOEF) pred;
        }
        /* AC10 */
-       if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+       if ((Al=coef_bits[2]) != 0 && workptr[8] == 0) {
          num = 36 * Q00 * (DC2 - DC8);
          if (num >= 0) {
            pred = (int) (((Q10<<7) + num) / (Q10<<8));
@@ -599,10 +652,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
              pred = (1<<Al)-1;
            pred = -pred;
          }
-         workspace[8] = (JCOEF) pred;
+         workptr[8] = (JCOEF) pred;
        }
        /* AC20 */
-       if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+       if ((Al=coef_bits[3]) != 0 && workptr[16] == 0) {
          num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
          if (num >= 0) {
            pred = (int) (((Q20<<7) + num) / (Q20<<8));
@@ -614,10 +667,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
              pred = (1<<Al)-1;
            pred = -pred;
          }
-         workspace[16] = (JCOEF) pred;
+         workptr[16] = (JCOEF) pred;
        }
        /* AC11 */
-       if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+       if ((Al=coef_bits[4]) != 0 && workptr[9] == 0) {
          num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
          if (num >= 0) {
            pred = (int) (((Q11<<7) + num) / (Q11<<8));
@@ -629,10 +682,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
              pred = (1<<Al)-1;
            pred = -pred;
          }
-         workspace[9] = (JCOEF) pred;
+         workptr[9] = (JCOEF) pred;
        }
        /* AC02 */
-       if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+       if ((Al=coef_bits[5]) != 0 && workptr[2] == 0) {
          num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
          if (num >= 0) {
            pred = (int) (((Q02<<7) + num) / (Q02<<8));
@@ -644,10 +697,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
              pred = (1<<Al)-1;
            pred = -pred;
          }
-         workspace[2] = (JCOEF) pred;
+         workptr[2] = (JCOEF) pred;
        }
        /* OK, do the IDCT */
-       (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+       (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workptr,
                        output_ptr, output_col);
        /* Advance for next column */
        DC1 = DC2; DC2 = DC3;
diff --git a/jdcolmmx.asm b/jdcolmmx.asm
new file mode 100644 (file)
index 0000000..e46622c
--- /dev/null
@@ -0,0 +1,438 @@
+;
+; jdcolmmx.asm - colorspace conversion (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_344        equ      22554                  ; FIX(0.34414)
+F_0_714        equ      46802                  ; FIX(0.71414)
+F_1_402        equ      91881                  ; FIX(1.40200)
+F_1_772        equ     116130                  ; FIX(1.77200)
+F_0_402        equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285        equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228        equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_ycc_rgb_convert_mmx)
+
+EXTN(jconst_ycc_rgb_convert_mmx):
+
+PW_F0402       times 4 dw  F_0_402
+PW_MF0228      times 4 dw -F_0_228
+PW_MF0344_F0285        times 2 dw -F_0_344, F_0_285
+PW_ONE         times 4 dw  1
+PD_ONEHALF     times 2 dd  1 << (SCALEBITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jpeg_ycc_rgb_convert_mmx (j_decompress_ptr cinfo,
+;                           JSAMPIMAGE input_buf, JDIMENSION input_row,
+;                           JSAMPARRAY output_buf, int num_rows)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)   (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)   (b)+16          ; JDIMENSION input_row
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)    (b)+24          ; int num_rows
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_ycc_rgb_convert_mmx)
+
+EXTN(jpeg_ycc_rgb_convert_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; num_cols
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [input_row(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+       lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+       lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+       pop     ecx
+
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     eax, INT [num_rows(eax)]
+       test    eax,eax
+       jle     near .return
+       alignx  16,7
+.rowloop:
+       push    eax
+       push    edi
+       push    edx
+       push    ebx
+       push    esi
+       push    ecx                     ; col
+
+       mov     esi, JSAMPROW [esi]     ; inptr0
+       mov     ebx, JSAMPROW [ebx]     ; inptr1
+       mov     edx, JSAMPROW [edx]     ; inptr2
+       mov     edi, JSAMPROW [edi]     ; outptr
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+       alignx  16,7
+.columnloop:
+
+       movq    mm5, MMWORD [ebx]       ; mm5=Cb(01234567)
+       movq    mm1, MMWORD [edx]       ; mm1=Cr(01234567)
+
+       pcmpeqw mm4,mm4
+       pcmpeqw mm7,mm7
+       psrlw   mm4,BYTE_BIT
+       psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+       movq    mm0,mm4                 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
+
+       pand    mm4,mm5                 ; mm4=Cb(0246)=CbE
+       psrlw   mm5,BYTE_BIT            ; mm5=Cb(1357)=CbO
+       pand    mm0,mm1                 ; mm0=Cr(0246)=CrE
+       psrlw   mm1,BYTE_BIT            ; mm1=Cr(1357)=CrO
+
+       paddw   mm4,mm7
+       paddw   mm5,mm7
+       paddw   mm0,mm7
+       paddw   mm1,mm7
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movq    mm2,mm4                 ; mm2=CbE
+       movq    mm3,mm5                 ; mm3=CbO
+       paddw   mm4,mm4                 ; mm4=2*CbE
+       paddw   mm5,mm5                 ; mm5=2*CbO
+       movq    mm6,mm0                 ; mm6=CrE
+       movq    mm7,mm1                 ; mm7=CrO
+       paddw   mm0,mm0                 ; mm0=2*CrE
+       paddw   mm1,mm1                 ; mm1=2*CrO
+
+       pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbE * -FIX(0.22800))
+       pmulhw  mm5,[GOTOFF(eax,PW_MF0228)]     ; mm5=(2*CbO * -FIX(0.22800))
+       pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrE * FIX(0.40200))
+       pmulhw  mm1,[GOTOFF(eax,PW_F0402)]      ; mm1=(2*CrO * FIX(0.40200))
+
+       paddw   mm4,[GOTOFF(eax,PW_ONE)]
+       paddw   mm5,[GOTOFF(eax,PW_ONE)]
+       psraw   mm4,1                   ; mm4=(CbE * -FIX(0.22800))
+       psraw   mm5,1                   ; mm5=(CbO * -FIX(0.22800))
+       paddw   mm0,[GOTOFF(eax,PW_ONE)]
+       paddw   mm1,[GOTOFF(eax,PW_ONE)]
+       psraw   mm0,1                   ; mm0=(CrE * FIX(0.40200))
+       psraw   mm1,1                   ; mm1=(CrO * FIX(0.40200))
+
+       paddw   mm4,mm2
+       paddw   mm5,mm3
+       paddw   mm4,mm2                 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
+       paddw   mm5,mm3                 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
+       paddw   mm0,mm6                 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
+       paddw   mm1,mm7                 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=(B-Y)E
+       movq    MMWORD [wk(1)], mm5     ; wk(1)=(B-Y)O
+
+       movq      mm4,mm2
+       movq      mm5,mm3
+       punpcklwd mm2,mm6
+       punpckhwd mm4,mm6
+       pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm4,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd mm3,mm7
+       punpckhwd mm5,mm7
+       pmaddwd   mm3,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm4,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm2,SCALEBITS
+       psrad     mm4,SCALEBITS
+       paddd     mm3,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm3,SCALEBITS
+       psrad     mm5,SCALEBITS
+
+       packssdw  mm2,mm4       ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+       packssdw  mm3,mm5       ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+       psubw     mm2,mm6       ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+       psubw     mm3,mm7       ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+
+       movq      mm5, MMWORD [esi]     ; mm5=Y(01234567)
+
+       pcmpeqw   mm4,mm4
+       psrlw     mm4,BYTE_BIT          ; mm4={0xFF 0x00 0xFF 0x00 ..}
+       pand      mm4,mm5               ; mm4=Y(0246)=YE
+       psrlw     mm5,BYTE_BIT          ; mm5=Y(1357)=YO
+
+       paddw     mm0,mm4               ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
+       paddw     mm1,mm5               ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
+       packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+       packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
+
+       paddw     mm2,mm4               ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
+       paddw     mm3,mm5               ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
+       packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+       packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+       paddw     mm4, MMWORD [wk(0)]   ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
+       paddw     mm5, MMWORD [wk(1)]   ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
+       packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+       packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+       punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
+
+       movq      mmG,mmA
+       movq      mmH,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+       punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
+
+       psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+       psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
+
+       movq      mmC,mmD
+       movq      mmB,mmD
+       punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+       punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
+
+       psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
+
+       movq      mmF,mmE
+       punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+       punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
+
+       punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+       punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+       punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      short .nextrow
+
+       add     esi, byte SIZEOF_MMWORD                 ; inptr0
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+       add     edx, byte SIZEOF_MMWORD                 ; inptr2
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_MMWORD
+       jb      short .column_st8
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    mmA,mmC
+       sub     ecx, byte 2*SIZEOF_MMWORD
+       add     edi, byte 2*SIZEOF_MMWORD
+       jmp     short .column_st4
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st4
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmA,mmE
+       sub     ecx, byte SIZEOF_MMWORD
+       add     edi, byte SIZEOF_MMWORD
+.column_st4:
+       movd    eax,mmA
+       cmp     ecx, byte SIZEOF_DWORD
+       jb      short .column_st2
+       mov     DWORD [edi+0*SIZEOF_DWORD], eax
+       psrlq   mmA,DWORD_BIT
+       movd    eax,mmA
+       sub     ecx, byte SIZEOF_DWORD
+       add     edi, byte SIZEOF_DWORD
+.column_st2:
+       cmp     ecx, byte SIZEOF_WORD
+       jb      short .column_st1
+       mov     WORD [edi+0*SIZEOF_WORD], ax
+       shr     eax,WORD_BIT
+       sub     ecx, byte SIZEOF_WORD
+       add     edi, byte SIZEOF_WORD
+.column_st1:
+       cmp     ecx, byte SIZEOF_BYTE
+       jb      short .nextrow
+       mov     BYTE [edi+0*SIZEOF_BYTE], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%else
+       pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%endif
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+       punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+       punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
+
+       movq      mmC,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+       punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+       movq      mmG,mmB
+       punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+       punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
+
+       movq      mmD,mmA
+       punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+       punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+       movq      mmH,mmC
+       punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+       punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      short .nextrow
+
+       add     esi, byte SIZEOF_MMWORD                 ; inptr0
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+       add     edx, byte SIZEOF_MMWORD                 ; inptr2
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       cmp     ecx, byte SIZEOF_MMWORD/2
+       jb      short .column_st8
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    mmA,mmC
+       movq    mmD,mmH
+       sub     ecx, byte SIZEOF_MMWORD/2
+       add     edi, byte 2*SIZEOF_MMWORD
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD/4
+       jb      short .column_st4
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmA,mmD
+       sub     ecx, byte SIZEOF_MMWORD/4
+       add     edi, byte 1*SIZEOF_MMWORD
+.column_st4:
+       cmp     ecx, byte SIZEOF_MMWORD/8
+       jb      short .nextrow
+       movd    DWORD [edi+0*SIZEOF_DWORD], mmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+       alignx  16,7
+
+.nextrow:
+       pop     ecx
+       pop     esi
+       pop     ebx
+       pop     edx
+       pop     edi
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       add     ebx, byte SIZEOF_JSAMPROW
+       add     edx, byte SIZEOF_JSAMPROW
+       add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+       dec     eax                             ; num_rows
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JDCOLOR_YCCRGB_MMX_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
index 6c04dfe8aa1b36e4ab4c9909c77e9cd26b74e9d7..9a8c7ea0202dcddfd1ff30dad859d2f7aa198535 100644 (file)
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -5,12 +5,20 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file contains output colorspace conversion routines.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcolsamp.h"          /* Private declarations */
 
 
 /* Private subobject */
@@ -105,6 +113,17 @@ build_ycc_rgb_table (j_decompress_ptr cinfo)
 }
 
 
+#if RGB_PIXELSIZE == 4
+/* offset of filler byte */
+#define RGB_FILLER  (6 - (RGB_RED) - (RGB_GREEN) - (RGB_BLUE))
+/* byte pattern to fill with */
+#ifdef RGBX_FILLER_0XFF
+#define RGB_FILLER_BYTE 0xFF
+#else
+#define RGB_FILLER_BYTE 0x00
+#endif
+#endif /* RGB_PIXELSIZE == 4 */
+
 /*
  * Convert some rows of samples to the output colorspace.
  *
@@ -151,6 +170,9 @@ ycc_rgb_convert (j_decompress_ptr cinfo,
                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                                 SCALEBITS))];
       outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+#if RGB_PIXELSIZE == 4
+      outptr[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
       outptr += RGB_PIXELSIZE;
     }
   }
@@ -228,6 +250,9 @@ gray_rgb_convert (j_decompress_ptr cinfo,
     for (col = 0; col < num_cols; col++) {
       /* We can dispense with GETJSAMPLE() here */
       outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+#if RGB_PIXELSIZE == 4
+      outptr[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
       outptr += RGB_PIXELSIZE;
     }
   }
@@ -305,6 +330,7 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
 {
   my_cconvert_ptr cconvert;
   int ci;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -358,8 +384,23 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
   case JCS_RGB:
     cinfo->out_color_components = RGB_PIXELSIZE;
     if (cinfo->jpeg_color_space == JCS_YCbCr) {
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_ycc_rgb_table(cinfo);
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2 &&
+          IS_CONST_ALIGNED_16(jconst_ycc_rgb_convert_sse2)) {
+        cconvert->pub.color_convert = jpeg_ycc_rgb_convert_sse2;
+      } else
+#endif
+#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED
+      if (simd & JSIMD_MMX) {
+        cconvert->pub.color_convert = jpeg_ycc_rgb_convert_mmx;
+      } else
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+      {
+        cconvert->pub.color_convert = ycc_rgb_convert;
+        build_ycc_rgb_table(cinfo);
+      }
     } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
       cconvert->pub.color_convert = gray_rgb_convert;
     } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
@@ -394,3 +435,28 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
   else
     cinfo->output_components = cinfo->out_color_components;
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_color_deconverter (j_decompress_ptr cinfo)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED
+  if (simd & JSIMD_SSE2 &&
+      IS_CONST_ALIGNED_16(jconst_ycc_rgb_convert_sse2))
+    return JSIMD_SSE2;
+#endif
+#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED
+  if (simd & JSIMD_MMX)
+    return JSIMD_MMX;
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+
+  return JSIMD_NONE;
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
diff --git a/jdcolss2.asm b/jdcolss2.asm
new file mode 100644 (file)
index 0000000..fd6f04d
--- /dev/null
@@ -0,0 +1,536 @@
+;
+; jdcolss2.asm - colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_344        equ      22554                  ; FIX(0.34414)
+F_0_714        equ      46802                  ; FIX(0.71414)
+F_1_402        equ      91881                  ; FIX(1.40200)
+F_1_772        equ     116130                  ; FIX(1.77200)
+F_0_402        equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285        equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228        equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_ycc_rgb_convert_sse2)
+
+EXTN(jconst_ycc_rgb_convert_sse2):
+
+PW_F0402       times 8 dw  F_0_402
+PW_MF0228      times 8 dw -F_0_228
+PW_MF0344_F0285        times 4 dw -F_0_344, F_0_285
+PW_ONE         times 8 dw  1
+PD_ONEHALF     times 4 dd  1 << (SCALEBITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jpeg_ycc_rgb_convert_sse2 (j_decompress_ptr cinfo,
+;                            JSAMPIMAGE input_buf, JDIMENSION input_row,
+;                            JSAMPARRAY output_buf, int num_rows)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)   (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)   (b)+16          ; JDIMENSION input_row
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)    (b)+24          ; int num_rows
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_ycc_rgb_convert_sse2)
+
+EXTN(jpeg_ycc_rgb_convert_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; num_cols
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [input_row(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+       lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+       lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+       pop     ecx
+
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     eax, INT [num_rows(eax)]
+       test    eax,eax
+       jle     near .return
+       alignx  16,7
+.rowloop:
+       push    eax
+       push    edi
+       push    edx
+       push    ebx
+       push    esi
+       push    ecx                     ; col
+
+       mov     esi, JSAMPROW [esi]     ; inptr0
+       mov     ebx, JSAMPROW [ebx]     ; inptr1
+       mov     edx, JSAMPROW [edx]     ; inptr2
+       mov     edi, JSAMPROW [edi]     ; outptr
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+       alignx  16,7
+.columnloop:
+
+       movdqa  xmm5, XMMWORD [ebx]     ; xmm5=Cb(0123456789ABCDEF)
+       movdqa  xmm1, XMMWORD [edx]     ; xmm1=Cr(0123456789ABCDEF)
+
+       pcmpeqw xmm4,xmm4
+       pcmpeqw xmm7,xmm7
+       psrlw   xmm4,BYTE_BIT
+       psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+       movdqa  xmm0,xmm4               ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
+
+       pand    xmm4,xmm5               ; xmm4=Cb(02468ACE)=CbE
+       psrlw   xmm5,BYTE_BIT           ; xmm5=Cb(13579BDF)=CbO
+       pand    xmm0,xmm1               ; xmm0=Cr(02468ACE)=CrE
+       psrlw   xmm1,BYTE_BIT           ; xmm1=Cr(13579BDF)=CrO
+
+       paddw   xmm4,xmm7
+       paddw   xmm5,xmm7
+       paddw   xmm0,xmm7
+       paddw   xmm1,xmm7
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movdqa  xmm2,xmm4               ; xmm2=CbE
+       movdqa  xmm3,xmm5               ; xmm3=CbO
+       paddw   xmm4,xmm4               ; xmm4=2*CbE
+       paddw   xmm5,xmm5               ; xmm5=2*CbO
+       movdqa  xmm6,xmm0               ; xmm6=CrE
+       movdqa  xmm7,xmm1               ; xmm7=CrO
+       paddw   xmm0,xmm0               ; xmm0=2*CrE
+       paddw   xmm1,xmm1               ; xmm1=2*CrO
+
+       pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbE * -FIX(0.22800))
+       pmulhw  xmm5,[GOTOFF(eax,PW_MF0228)]    ; xmm5=(2*CbO * -FIX(0.22800))
+       pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrE * FIX(0.40200))
+       pmulhw  xmm1,[GOTOFF(eax,PW_F0402)]     ; xmm1=(2*CrO * FIX(0.40200))
+
+       paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm5,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm4,1                  ; xmm4=(CbE * -FIX(0.22800))
+       psraw   xmm5,1                  ; xmm5=(CbO * -FIX(0.22800))
+       paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm1,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm0,1                  ; xmm0=(CrE * FIX(0.40200))
+       psraw   xmm1,1                  ; xmm1=(CrO * FIX(0.40200))
+
+       paddw   xmm4,xmm2
+       paddw   xmm5,xmm3
+       paddw   xmm4,xmm2               ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
+       paddw   xmm5,xmm3               ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
+       paddw   xmm0,xmm6               ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
+       paddw   xmm1,xmm7               ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
+
+       movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=(B-Y)E
+       movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(B-Y)O
+
+       movdqa    xmm4,xmm2
+       movdqa    xmm5,xmm3
+       punpcklwd xmm2,xmm6
+       punpckhwd xmm4,xmm6
+       pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd xmm3,xmm7
+       punpckhwd xmm5,xmm7
+       pmaddwd   xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm4,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm2,SCALEBITS
+       psrad     xmm4,SCALEBITS
+       paddd     xmm3,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm3,SCALEBITS
+       psrad     xmm5,SCALEBITS
+
+       packssdw  xmm2,xmm4     ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+       packssdw  xmm3,xmm5     ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+       psubw     xmm2,xmm6     ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+       psubw     xmm3,xmm7     ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+
+       movdqa    xmm5, XMMWORD [esi]   ; xmm5=Y(0123456789ABCDEF)
+
+       pcmpeqw   xmm4,xmm4
+       psrlw     xmm4,BYTE_BIT         ; xmm4={0xFF 0x00 0xFF 0x00 ..}
+       pand      xmm4,xmm5             ; xmm4=Y(02468ACE)=YE
+       psrlw     xmm5,BYTE_BIT         ; xmm5=Y(13579BDF)=YO
+
+       paddw     xmm0,xmm4             ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
+       paddw     xmm1,xmm5             ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
+       packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+       packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+       paddw     xmm2,xmm4             ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
+       paddw     xmm3,xmm5             ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
+       packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+       packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+       paddw     xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
+       paddw     xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
+       packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+       packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+       punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+       movdqa    xmmG,xmmA
+       movdqa    xmmH,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+       punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+       psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+       psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+       movdqa    xmmC,xmmD
+       movdqa    xmmB,xmmD
+       punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+       punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+       psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+       movdqa    xmmF,xmmE
+       punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+       punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+       pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+       movdqa    xmmB,xmmE
+       punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+       punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+       punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+       pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+       movdqa    xmmB,xmmF
+       punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+       punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+       punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+       punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+       punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+       punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out1
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+       jmp     short .out0
+.out1: ; --(unaligned)-----------------
+       pcmpeqb    xmmH,xmmH                    ; xmmH=(all 1's)
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmH                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmF,xmmH                    ; movntdqu XMMWORD [edi], xmmF
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+.out0:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .nextrow
+
+       add     esi, byte SIZEOF_XMMWORD        ; inptr0
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       pcmpeqb xmmH,xmmH                       ; xmmH=(all 1's)
+       lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_XMMWORD
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmH                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmF
+       sub     ecx, byte 2*SIZEOF_XMMWORD
+       jmp     short .column_st15
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmD
+       sub     ecx, byte SIZEOF_XMMWORD
+.column_st15:
+       mov     eax,ecx
+       xor     ecx, byte 0x0F
+       shl     ecx, 2
+       movd    xmmB,ecx
+       psrlq   xmmH,4
+       pcmpeqb xmmE,xmmE
+       psrlq   xmmH,xmmB
+       psrlq   xmmE,xmmB
+       punpcklbw xmmE,xmmH
+       ; ----------------
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0
+       add     eax,ecx
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmE,ecx
+       movdqa  xmmG,xmmA
+       movdqa  xmmC,xmmE
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmE, SIZEOF_XMMWORD/2
+       movd    xmmD,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1
+       movd    xmmF,ecx
+       psllq   xmmA,xmmF
+       psllq   xmmE,xmmF
+       jmp     short .adj0
+.adj1: neg     ecx
+       movd    xmmF,ecx
+       psrlq   xmmA,xmmF
+       psrlq   xmmE,xmmF
+       psllq   xmmG,xmmD
+       psllq   xmmC,xmmD
+       por     xmmA,xmmG
+       por     xmmE,xmmC
+.adj0: ; ----------------
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+       pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+       punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+       punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+       movdqa    xmmC,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+       punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+       movdqa    xmmG,xmmB
+       punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+       punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+       movdqa    xmmD,xmmA
+       punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+       punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+       movdqa    xmmH,xmmC
+       punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+       punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out1
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+       movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+       jmp     short .out0
+.out1: ; --(unaligned)-----------------
+       pcmpeqb    xmmE,xmmE                    ; xmmE=(all 1's)
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmE                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmC,xmmE                    ; movntdqu XMMWORD [edi], xmmC
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmH,xmmE                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+.out0:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .nextrow
+
+       add     esi, byte SIZEOF_XMMWORD        ; inptr0
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       pcmpeqb xmmE,xmmE                       ; xmmE=(all 1's)
+       cmp     ecx, byte SIZEOF_XMMWORD/2
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmE                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmC
+       movdqa  xmmD,xmmH
+       sub     ecx, byte SIZEOF_XMMWORD/2
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD/4
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmD
+       sub     ecx, byte SIZEOF_XMMWORD/4
+.column_st15:
+       cmp     ecx, byte SIZEOF_XMMWORD/16
+       jb      short .nextrow
+       mov     eax,ecx
+       xor     ecx, byte 0x03
+       inc     ecx
+       shl     ecx, 4
+       movd    xmmF,ecx
+       psrlq   xmmE,xmmF
+       punpcklbw xmmE,xmmE
+       ; ----------------
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0
+       lea     eax, [ecx+eax*4]        ; RGB_PIXELSIZE
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmE,ecx
+       movdqa  xmmB,xmmA
+       movdqa  xmmG,xmmE
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmE, SIZEOF_XMMWORD/2
+       movd    xmmC,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1
+       movd    xmmH,ecx
+       psllq   xmmA,xmmH
+       psllq   xmmE,xmmH
+       jmp     short .adj0
+.adj1: neg     ecx
+       movd    xmmH,ecx
+       psrlq   xmmA,xmmH
+       psrlq   xmmE,xmmH
+       psllq   xmmB,xmmC
+       psllq   xmmG,xmmC
+       por     xmmA,xmmB
+       por     xmmE,xmmG
+.adj0: ; ----------------
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+       alignx  16,7
+
+.nextrow:
+       pop     ecx
+       pop     esi
+       pop     ebx
+       pop     edx
+       pop     edi
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW
+       add     ebx, byte SIZEOF_JSAMPROW
+       add     edx, byte SIZEOF_JSAMPROW
+       add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+       dec     eax                             ; num_rows
+       jg      near .rowloop
+
+       sfence          ; flush the write buffer
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JDCOLOR_YCCRGB_SSE2_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
diff --git a/jdct.h b/jdct.h
index 04192a266ae148072feecb5feff6bca796c2b71a..678a3d10fa8dfb02cee774c35184a88cf417d6cc 100644 (file)
--- a/jdct.h
+++ b/jdct.h
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This include file contains common declarations for the forward and
  * inverse DCT modules.  These declarations are private to the DCT managers
  * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
  */
 
 
+/* SIMD Ext: configuration check */
+
+#if BITS_IN_JSAMPLE != 8
+#error "Sorry, this SIMD code only copes with 8-bit sample values."
+#endif
+
+
 /*
  * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
  * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
  * Quantization of the output coefficients is done by jcdctmgr.c.
  */
 
-#if BITS_IN_JSAMPLE == 8
-typedef int DCTELEM;           /* 16 or 32 bits is fine */
-#else
-typedef INT32 DCTELEM;         /* must have 32 bits */
-#endif
+/* SIMD Ext: To maximize parallelism, Type DCTELEM is changed to short
+ * (originally, int).
+ */
+typedef short DCTELEM;         /* SIMD Ext: must be short */
 
 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+typedef JMETHOD(void, convsamp_int_method_ptr,
+               (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace));
+typedef JMETHOD(void, convsamp_float_method_ptr,
+               (JSAMPARRAY sample_data, JDIMENSION start_col,
+                FAST_FLOAT *workspace));
+typedef JMETHOD(void, quantize_int_method_ptr,
+               (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace));
+typedef JMETHOD(void, quantize_float_method_ptr,
+               (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                FAST_FLOAT * workspace));
 
 
 /*
@@ -49,19 +74,22 @@ typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
 
 /* typedef inverse_DCT_method_ptr is declared in jpegint.h */
 
+/* SIMD Ext: To maximize parallelism, Type MULTIPLIER is changed to short.
+ * Macro definitions of MULTIPLIER and FAST_FLOAT in jmorecfg.h are ignored.
+ */
+#undef MULTIPLIER
+#define MULTIPLIER  short      /* SIMD Ext: must be short */
+#undef FAST_FLOAT
+#define FAST_FLOAT  float      /* SIMD Ext: must be float */
+
 /*
  * Each IDCT routine has its own ideas about the best dct_table element type.
  */
 
-typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
-#if BITS_IN_JSAMPLE == 8
-typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+typedef MULTIPLIER ISLOW_MULT_TYPE;    /* SIMD Ext: must be short */
+typedef MULTIPLIER IFAST_MULT_TYPE;    /* SIMD Ext: must be short */
 #define IFAST_SCALE_BITS  2    /* fractional bits in scale factors */
-#else
-typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13   /* fractional bits in scale factors */
-#endif
-typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+typedef FAST_FLOAT FLOAT_MULT_TYPE;    /* SIMD Ext: must be float */
 
 
 /*
@@ -81,15 +109,64 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow                jFDislow
-#define jpeg_fdct_ifast                jFDifast
-#define jpeg_fdct_float                jFDfloat
-#define jpeg_idct_islow                jRDislow
-#define jpeg_idct_ifast                jRDifast
-#define jpeg_idct_float                jRDfloat
-#define jpeg_idct_4x4          jRD4x4
-#define jpeg_idct_2x2          jRD2x2
-#define jpeg_idct_1x1          jRD1x1
+#define jpeg_fdct_islow                jFDislow                /* jfdctint.asm */
+#define jpeg_fdct_ifast                jFDifast                /* jfdctfst.asm */
+#define jpeg_fdct_float                jFDfloat                /* jfdctflt.asm */
+#define jpeg_fdct_islow_mmx    jFDMislow               /* jfmmxint.asm */
+#define jpeg_fdct_ifast_mmx    jFDMifast               /* jfmmxfst.asm */
+#define jpeg_fdct_float_3dnow  jFD3float               /* jf3dnflt.asm */
+#define jpeg_fdct_islow_sse2   jFDSislow               /* jfss2int.asm */
+#define jpeg_fdct_ifast_sse2   jFDSifast               /* jfss2fst.asm */
+#define jpeg_fdct_float_sse    jFDSfloat               /* jfsseflt.asm */
+#define jpeg_convsamp_int      jCnvInt                 /* jcqntint.asm */
+#define jpeg_quantize_int      jQntInt                 /* jcqntint.asm */
+#define jpeg_quantize_idiv     jQntIDiv                /* jcqntint.asm */
+#define jpeg_convsamp_float    jCnvFloat               /* jcqntflt.asm */
+#define jpeg_quantize_float    jQntFloat               /* jcqntflt.asm */
+#define jpeg_convsamp_int_mmx  jCnvMmx                 /* jcqntmmx.asm */
+#define jpeg_quantize_int_mmx  jQntMmx                 /* jcqntmmx.asm */
+#define jpeg_convsamp_flt_3dnow        jCnv3dnow               /* jcqnt3dn.asm */
+#define jpeg_quantize_flt_3dnow        jQnt3dnow               /* jcqnt3dn.asm */
+#define jpeg_convsamp_int_sse2 jCnvISse2               /* jcqnts2i.asm */
+#define jpeg_quantize_int_sse2 jQntISse2               /* jcqnts2i.asm */
+#define jpeg_convsamp_flt_sse  jCnvSse                 /* jcqntsse.asm */
+#define jpeg_quantize_flt_sse  jQntSse                 /* jcqntsse.asm */
+#define jpeg_convsamp_flt_sse2 jCnvFSse2               /* jcqnts2f.asm */
+#define jpeg_quantize_flt_sse2 jQntFSse2               /* jcqnts2f.asm */
+#define jpeg_idct_islow                jRDislow                /* jidctint.asm */
+#define jpeg_idct_ifast                jRDifast                /* jidctfst.asm */
+#define jpeg_idct_float                jRDfloat                /* jidctflt.asm */
+#define jpeg_idct_4x4          jRD4x4                  /* jidctred.asm */
+#define jpeg_idct_2x2          jRD2x2                  /* jidctred.asm */
+#define jpeg_idct_1x1          jRD1x1                  /* jidctred.asm */
+#define jpeg_idct_islow_mmx    jRDMislow               /* jimmxint.asm */
+#define jpeg_idct_ifast_mmx    jRDMifast               /* jimmxfst.asm */
+#define jpeg_idct_float_3dnow  jRD3float               /* ji3dnflt.asm */
+#define jpeg_idct_4x4_mmx      jRDM4x4                 /* jimmxred.asm */
+#define jpeg_idct_2x2_mmx      jRDM2x2                 /* jimmxred.asm */
+#define jpeg_idct_islow_sse2   jRDSislow               /* jiss2int.asm */
+#define jpeg_idct_ifast_sse2   jRDSifast               /* jiss2fst.asm */
+#define jpeg_idct_float_sse    jRDSfloat               /* jisseflt.asm */
+#define jpeg_idct_float_sse2   jRD2float               /* jiss2flt.asm */
+#define jpeg_idct_4x4_sse2     jRDS4x4                 /* jiss2red.asm */
+#define jpeg_idct_2x2_sse2     jRDS2x2                 /* jiss2red.asm */
+#define jconst_fdct_float      jFCfloat                /* jfdctflt.asm */
+#define jconst_fdct_islow_mmx  jFCMislow               /* jfmmxint.asm */
+#define jconst_fdct_ifast_mmx  jFCMifast               /* jfmmxfst.asm */
+#define jconst_fdct_float_3dnow        jFC3float               /* jf3dnflt.asm */
+#define jconst_fdct_islow_sse2 jFCSislow               /* jfss2int.asm */
+#define jconst_fdct_ifast_sse2 jFCSifast               /* jfss2fst.asm */
+#define jconst_fdct_float_sse  jFCSfloat               /* jfsseflt.asm */
+#define jconst_idct_float      jRCfloat                /* jidctflt.asm */
+#define jconst_idct_islow_mmx  jRCMislow               /* jimmxint.asm */
+#define jconst_idct_ifast_mmx  jRCMifast               /* jimmxfst.asm */
+#define jconst_idct_float_3dnow        jRC3float               /* ji3dnflt.asm */
+#define jconst_idct_red_mmx    jRCMred                 /* jimmxred.asm */
+#define jconst_idct_islow_sse2 jRCSislow               /* jiss2int.asm */
+#define jconst_idct_ifast_sse2 jRCSifast               /* jiss2fst.asm */
+#define jconst_idct_float_sse  jRCSfloat               /* jisseflt.asm */
+#define jconst_idct_float_sse2 jRC2float               /* jiss2flt.asm */
+#define jconst_idct_red_sse2   jRCSred                 /* jiss2red.asm */
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 /* Extern declarations for the forward and inverse DCT routines. */
@@ -98,6 +175,47 @@ EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
 EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
 EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
 
+EXTERN(void) jpeg_fdct_islow_mmx JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_ifast_mmx JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_float_3dnow JPP((FAST_FLOAT * data));
+
+EXTERN(void) jpeg_fdct_islow_sse2 JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_ifast_sse2 JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_float_sse JPP((FAST_FLOAT * data));
+
+EXTERN(void) jpeg_convsamp_int
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace));
+EXTERN(void) jpeg_quantize_int
+    JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace));
+EXTERN(void) jpeg_quantize_idiv
+    JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace));
+EXTERN(void) jpeg_convsamp_float
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace));
+EXTERN(void) jpeg_quantize_float
+    JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace));
+
+EXTERN(void) jpeg_convsamp_int_mmx
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace));
+EXTERN(void) jpeg_quantize_int_mmx
+    JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace));
+EXTERN(void) jpeg_convsamp_flt_3dnow
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace));
+EXTERN(void) jpeg_quantize_flt_3dnow
+    JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace));
+
+EXTERN(void) jpeg_convsamp_int_sse2
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace));
+EXTERN(void) jpeg_quantize_int_sse2
+    JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace));
+EXTERN(void) jpeg_convsamp_flt_sse
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace));
+EXTERN(void) jpeg_quantize_flt_sse
+    JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace));
+EXTERN(void) jpeg_convsamp_flt_sse2
+    JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace));
+EXTERN(void) jpeg_quantize_flt_sse2
+    JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace));
+
 EXTERN(void) jpeg_idct_islow
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
@@ -117,6 +235,60 @@ EXTERN(void) jpeg_idct_1x1
     JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
 
+EXTERN(void) jpeg_idct_islow_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2_mmx
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+EXTERN(void) jpeg_idct_float_3dnow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float_sse
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+EXTERN(void) jpeg_idct_islow_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2_sse2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+        JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+extern const int jconst_fdct_float[];
+extern const int jconst_fdct_islow_mmx[];
+extern const int jconst_fdct_ifast_mmx[];
+extern const int jconst_fdct_float_3dnow[];
+extern const int jconst_fdct_islow_sse2[];
+extern const int jconst_fdct_ifast_sse2[];
+extern const int jconst_fdct_float_sse[];
+extern const int jconst_idct_float[];
+extern const int jconst_idct_islow_mmx[];
+extern const int jconst_idct_ifast_mmx[];
+extern const int jconst_idct_float_3dnow[];
+extern const int jconst_idct_red_mmx[];
+extern const int jconst_idct_islow_sse2[];
+extern const int jconst_idct_ifast_sse2[];
+extern const int jconst_idct_float_sse[];
+extern const int jconst_idct_float_sse2[];
+extern const int jconst_idct_red_sse2[];
+
 
 /*
  * Macros for handling fixed-point arithmetic; these are used by many
diff --git a/jdct.inc b/jdct.inc
new file mode 100644 (file)
index 0000000..a6fb0ed
--- /dev/null
+++ b/jdct.inc
@@ -0,0 +1,125 @@
+;
+; jdct.inc - private declarations for forward & reverse DCT subsystems
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; Last Modified : January 5, 2006
+;
+; [TAB8]
+
+; ---- jdct.h --------------------------------------------------------------
+;
+; configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only
+; valid setting on this SIMD extension.
+;
+%if BITS_IN_JSAMPLE != 8
+%error "Sorry, this SIMD code only copes with 8-bit sample values."
+%endif
+
+; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+; the DCT is to be performed in-place in that buffer.
+; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
+;
+%define DCTELEM                        word            ; short
+%define SIZEOF_DCTELEM         SIZEOF_WORD     ; sizeof(DCTELEM)
+
+; To maximize parallelism, Type MULTIPLIER is changed to short.
+;
+%define MULTIPLIER             word            ; short
+%define SIZEOF_MULTIPLIER      SIZEOF_WORD     ; sizeof(MULTIPLIER)
+%define FAST_FLOAT             FP32            ; float
+%define SIZEOF_FAST_FLOAT      SIZEOF_FP32     ; sizeof(FAST_FLOAT)
+
+; Each IDCT routine has its own ideas about the best dct_table element type.
+;
+%define ISLOW_MULT_TYPE        MULTIPLIER          ; must be short
+%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_MULTIPLIER   ; sizeof(ISLOW_MULT_TYPE)
+%define IFAST_MULT_TYPE        MULTIPLIER          ; must be short
+%define SIZEOF_IFAST_MULT_TYPE SIZEOF_MULTIPLIER   ; sizeof(IFAST_MULT_TYPE)
+%define IFAST_SCALE_BITS       2       ; fractional bits in scale factors
+%define FLOAT_MULT_TYPE        FAST_FLOAT          ; must be float
+%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FAST_FLOAT   ; sizeof(FLOAT_MULT_TYPE)
+
+; Each IDCT routine is responsible for range-limiting its results and
+; converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+; be quite far out of range if the input data is corrupt, so a bulletproof
+; range-limiting step is required.  We use a mask-and-table-lookup method
+; to do the combined operations quickly.
+;
+%define RANGE_MASK  (MAXJSAMPLE * 4 + 3)  ; 2 bits wider than legal samples
+
+; Short forms of external names for systems with brain-damaged linkers.
+;
+%ifdef NEED_SHORT_EXTERNAL_NAMES
+%define jpeg_fdct_islow                jFDislow        ; jfdctint.asm
+%define jpeg_fdct_ifast                jFDifast        ; jfdctfst.asm
+%define jpeg_fdct_float                jFDfloat        ; jfdctflt.asm
+%define jpeg_fdct_islow_mmx    jFDMislow       ; jfmmxint.asm
+%define jpeg_fdct_ifast_mmx    jFDMifast       ; jfmmxfst.asm
+%define jpeg_fdct_float_3dnow  jFD3float       ; jf3dnflt.asm
+%define jpeg_fdct_islow_sse2   jFDSislow       ; jfss2int.asm
+%define jpeg_fdct_ifast_sse2   jFDSifast       ; jfss2fst.asm
+%define jpeg_fdct_float_sse    jFDSfloat       ; jfsseflt.asm
+%define jpeg_convsamp_int      jCnvInt         ; jcqntint.asm
+%define jpeg_quantize_int      jQntInt         ; jcqntint.asm
+%define jpeg_quantize_idiv     jQntIDiv        ; jcqntint.asm
+%define jpeg_convsamp_float    jCnvFloat       ; jcqntflt.asm
+%define jpeg_quantize_float    jQntFloat       ; jcqntflt.asm
+%define jpeg_convsamp_int_mmx  jCnvMmx         ; jcqntmmx.asm
+%define jpeg_quantize_int_mmx  jQntMmx         ; jcqntmmx.asm
+%define jpeg_convsamp_flt_3dnow        jCnv3dnow       ; jcqnt3dn.asm
+%define jpeg_quantize_flt_3dnow        jQnt3dnow       ; jcqnt3dn.asm
+%define jpeg_convsamp_int_sse2 jCnvISse2       ; jcqnts2i.asm
+%define jpeg_quantize_int_sse2 jQntISse2       ; jcqnts2i.asm
+%define jpeg_convsamp_flt_sse  jCnvSse         ; jcqntsse.asm
+%define jpeg_quantize_flt_sse  jQntSse         ; jcqntsse.asm
+%define jpeg_convsamp_flt_sse2 jCnvFSse2       ; jcqnts2f.asm
+%define jpeg_quantize_flt_sse2 jQntFSse2       ; jcqnts2f.asm
+%define jpeg_idct_islow                jRDislow        ; jidctint.asm
+%define jpeg_idct_ifast                jRDifast        ; jidctfst.asm
+%define jpeg_idct_float                jRDfloat        ; jidctflt.asm
+%define jpeg_idct_4x4          jRD4x4          ; jidctred.asm
+%define jpeg_idct_2x2          jRD2x2          ; jidctred.asm
+%define jpeg_idct_1x1          jRD1x1          ; jidctred.asm
+%define jpeg_idct_islow_mmx    jRDMislow       ; jimmxint.asm
+%define jpeg_idct_ifast_mmx    jRDMifast       ; jimmxfst.asm
+%define jpeg_idct_float_3dnow  jRD3float       ; ji3dnflt.asm
+%define jpeg_idct_4x4_mmx      jRDM4x4         ; jimmxred.asm
+%define jpeg_idct_2x2_mmx      jRDM2x2         ; jimmxred.asm
+%define jpeg_idct_islow_sse2   jRDSislow       ; jiss2int.asm
+%define jpeg_idct_ifast_sse2   jRDSifast       ; jiss2fst.asm
+%define jpeg_idct_float_sse    jRDSfloat       ; jisseflt.asm
+%define jpeg_idct_float_sse2   jRD2float       ; jiss2flt.asm
+%define jpeg_idct_4x4_sse2     jRDS4x4         ; jiss2red.asm
+%define jpeg_idct_2x2_sse2     jRDS2x2         ; jiss2red.asm
+%define jconst_fdct_float      jFCfloat        ; jfdctflt.asm
+%define jconst_fdct_islow_mmx  jFCMislow       ; jfmmxint.asm
+%define jconst_fdct_ifast_mmx  jFCMifast       ; jfmmxfst.asm
+%define jconst_fdct_float_3dnow        jFC3float       ; jf3dnflt.asm
+%define jconst_fdct_islow_sse2 jFCSislow       ; jfss2int.asm
+%define jconst_fdct_ifast_sse2 jFCSifast       ; jfss2fst.asm
+%define jconst_fdct_float_sse  jFCSfloat       ; jfsseflt.asm
+%define jconst_idct_float      jRCfloat        ; jidctflt.asm
+%define jconst_idct_islow_mmx  jRCMislow       ; jimmxint.asm
+%define jconst_idct_ifast_mmx  jRCMifast       ; jimmxfst.asm
+%define jconst_idct_float_3dnow        jRC3float       ; ji3dnflt.asm
+%define jconst_idct_red_mmx    jRCMred         ; jimmxred.asm
+%define jconst_idct_islow_sse2 jRCSislow       ; jiss2int.asm
+%define jconst_idct_ifast_sse2 jRCSifast       ; jiss2fst.asm
+%define jconst_idct_float_sse  jRCSfloat       ; jisseflt.asm
+%define jconst_idct_float_sse2 jRC2float       ; jiss2flt.asm
+%define jconst_idct_red_sse2   jRCSred         ; jiss2red.asm
+%endif ; NEED_SHORT_EXTERNAL_NAMES
+
+; --------------------------------------------------------------------------
+
+%define ROW(n,b,s)             ((b)+(n)*(s))
+%define COL(n,b,s)             ((b)+(n)*(s)*DCTSIZE)
+
+%define DWBLOCK(m,n,b,s)       ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
+%define MMBLOCK(m,n,b,s)       ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
+%define XMMBLOCK(m,n,b,s)      ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
+
+; --------------------------------------------------------------------------
index bbf8d0e92fdd84ddaa0017a2df039d224a740898..de6df8d129a0d5a79c2f85a788a795f54b5ea030 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : December 24, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains the inverse-DCT management logic.
  * This code selects a particular IDCT implementation to be used,
  * and it performs related housekeeping chores.  No code in this file
@@ -94,6 +101,7 @@ start_pass (j_decompress_ptr cinfo)
   int method = 0;
   inverse_DCT_method_ptr method_ptr = NULL;
   JQUANT_TBL * qtbl;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -105,34 +113,95 @@ start_pass (j_decompress_ptr cinfo)
       method = JDCT_ISLOW;     /* jidctred uses islow-style table */
       break;
     case 2:
-      method_ptr = jpeg_idct_2x2;
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2 &&
+          IS_CONST_ALIGNED_16(jconst_idct_red_sse2))
+       method_ptr = jpeg_idct_2x2_sse2;
+      else
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+      if (simd & JSIMD_MMX)
+       method_ptr = jpeg_idct_2x2_mmx;
+      else
+#endif
+       method_ptr = jpeg_idct_2x2;
       method = JDCT_ISLOW;     /* jidctred uses islow-style table */
       break;
     case 4:
-      method_ptr = jpeg_idct_4x4;
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+      if (simd & JSIMD_SSE2 &&
+          IS_CONST_ALIGNED_16(jconst_idct_red_sse2))
+       method_ptr = jpeg_idct_4x4_sse2;
+      else
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+      if (simd & JSIMD_MMX)
+       method_ptr = jpeg_idct_4x4_mmx;
+      else
+#endif
+       method_ptr = jpeg_idct_4x4;
       method = JDCT_ISLOW;     /* jidctred uses islow-style table */
       break;
-#endif
+#endif /* IDCT_SCALING_SUPPORTED */
     case DCTSIZE:
       switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
       case JDCT_ISLOW:
-       method_ptr = jpeg_idct_islow;
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_idct_islow_sse2))
+         method_ptr = jpeg_idct_islow_sse2;
+       else
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         method_ptr = jpeg_idct_islow_mmx;
+       else
+#endif
+         method_ptr = jpeg_idct_islow;
        method = JDCT_ISLOW;
        break;
-#endif
+#endif /* DCT_ISLOW_SUPPORTED */
 #ifdef DCT_IFAST_SUPPORTED
       case JDCT_IFAST:
-       method_ptr = jpeg_idct_ifast;
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_idct_ifast_sse2))
+         method_ptr = jpeg_idct_ifast_sse2;
+       else
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         method_ptr = jpeg_idct_ifast_mmx;
+       else
+#endif
+         method_ptr = jpeg_idct_ifast;
        method = JDCT_IFAST;
        break;
-#endif
+#endif /* DCT_IFAST_SUPPORTED */
 #ifdef DCT_FLOAT_SUPPORTED
       case JDCT_FLOAT:
-       method_ptr = jpeg_idct_float;
+#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE && simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_idct_float_sse2))
+         method_ptr = jpeg_idct_float_sse2;
+       else
+#endif
+#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED
+       if (simd & JSIMD_SSE &&
+           IS_CONST_ALIGNED_16(jconst_idct_float_sse))
+         method_ptr = jpeg_idct_float_sse;
+       else
+#endif
+#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED
+       if (simd & JSIMD_3DNOW)
+         method_ptr = jpeg_idct_float_3dnow;
+       else
+#endif
+         method_ptr = jpeg_idct_float;
        method = JDCT_FLOAT;
        break;
-#endif
+#endif /* DCT_FLOAT_SUPPORTED */
       default:
        ERREXIT(cinfo, JERR_NOT_COMPILED);
        break;
@@ -267,3 +336,78 @@ jinit_inverse_dct (j_decompress_ptr cinfo)
     idct->cur_method[ci] = -1;
   }
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_inverse_dct (j_decompress_ptr cinfo, int method)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+  switch (method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_idct_islow_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_ISLOW_SUPPORTED */
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_idct_ifast_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_IFAST_SUPPORTED */
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE && simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_idct_float_sse2))
+      return JSIMD_SSE;                /* (JSIMD_SSE | JSIMD_SSE2); */
+#endif
+#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED
+    if (simd & JSIMD_SSE &&
+        IS_CONST_ALIGNED_16(jconst_idct_float_sse))
+      return JSIMD_SSE;                /* (JSIMD_SSE | JSIMD_MMX); */
+#endif
+#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED
+    if (simd & JSIMD_3DNOW)
+      return JSIMD_3DNOW;      /* (JSIMD_3DNOW | JSIMD_MMX); */
+#endif
+    return JSIMD_NONE;
+#endif /* DCT_FLOAT_SUPPORTED */
+#ifdef IDCT_SCALING_SUPPORTED
+  case JDCT_FLOAT + 1:
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_idct_red_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+    return JSIMD_NONE;
+#endif /* IDCT_SCALING_SUPPORTED */
+  default:
+    ;
+  }
+
+  return JSIMD_NONE;   /* not compiled */
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
index b5ba39f736a7ae4f059d4b0594f4cc8ff854e1f4..4f75ebe9ee73f1a269caaa795b3a083043660979 100644 (file)
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : October 31, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains Huffman entropy decoding routines.
  *
  * Much of the complexity here has to do with supporting input suspension.
@@ -151,8 +158,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
 {
   JHUFF_TBL *htbl;
   d_derived_tbl *dtbl;
-  int p, i, l, si, numsymbols;
-  int lookbits, ctr;
+  int p, i, l, la, lx, si, numsymbols;
+  int lookbits, look_end, sym, val, ctr;
   char huffsize[257];
   unsigned int huffcode[257];
   unsigned int code;
@@ -234,18 +241,34 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
    * with that code.
    */
 
-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+  MEMZERO(dtbl->lookx_nbits, SIZEOF(dtbl->lookx_nbits));
 
   p = 0;
-  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+  for (l = 1; l <= HUFFX_LOOKAHEAD-1; l++) {
     for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
       /* l = current code's length, p = its index in huffcode[] & huffval[]. */
       /* Generate left-justified code followed by all possible bit sequences */
-      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
-      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-       dtbl->look_nbits[lookbits] = l;
-       dtbl->look_sym[lookbits] = htbl->huffval[p];
-       lookbits++;
+      sym = htbl->huffval[p];          /* current symbol */
+      la = sym & 15;                   /* length of additional bits field */
+      lx = HUFFX_LOOKAHEAD - l;
+      lookbits = huffcode[p] << lx;
+      look_end = lookbits + (1 << lx);
+      lx -= la;
+      while (lookbits < look_end) {
+       if (lx >= 0) {
+         val = (lookbits >>  lx) & ((1 << la) - 1);
+         ctr = 1 << lx;
+       } else {
+         val = (lookbits << -lx) & ((1 << la) - 1);
+         ctr = 1;
+       }
+       val = HUFF_EXTEND(val, la);
+       for (; ctr > 0; ctr--) {
+         dtbl->lookx_nbits[lookbits] = l + la;
+         dtbl->lookx_val[lookbits] = val;
+         dtbl->lookx_sym[lookbits] = sym;
+         lookbits++;
+       }
       }
     }
   }
@@ -271,23 +294,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
  * See jdhuff.h for info about usage.
  * Note: current values of get_buffer and bits_left are passed as parameters,
  * but are returned in the corresponding fields of the state struct.
- *
- * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
- * of get_buffer to be used.  (On machines with wider words, an even larger
- * buffer could be used.)  However, on some machines 32-bit shifts are
- * quite slow and take time proportional to the number of places shifted.
- * (This is true with most PC compilers, for instance.)  In this case it may
- * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
- * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
  */
 
-#ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15       /* minimum allowable value */
-#else
-#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
-#endif
-
-
 GLOBAL(boolean)
 jpeg_fill_bit_buffer (bitread_working_state * state,
                      register bit_buf_type get_buffer, register int bits_left,
@@ -433,32 +441,6 @@ jpeg_huff_decode (bitread_working_state * state,
 }
 
 
-/*
- * Figure F.12: extend sign bit.
- * On some machines, a shift and add will be faster than a table lookup.
- */
-
-#ifdef AVOID_TABLES
-
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
-
-#else
-
-#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
-
-static const int extend_test[16] =   /* entry n is 2**(n-1) */
-  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
-    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
-
-static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
-  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
-    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
-    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
-    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
-
-#endif /* AVOID_TABLES */
-
-
 /*
  * Check for a restart marker & resynchronize decoder.
  * Returns FALSE if must suspend.
@@ -548,13 +530,59 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
       /* Decode a single block's worth of coefficients */
 
       /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
-      if (s) {
-       CHECK_BIT_BUFFER(br_state, s, return FALSE);
-       r = GET_BITS(s);
-       s = HUFF_EXTEND(r, s);
+      {                /* HUFFX_DECODE */
+       register int nb, look, t;
+       if (bits_left < HUFFX_LOOKAHEAD) {
+         register const JOCTET * next_input_byte = br_state.next_input_byte;
+         register size_t         bytes_in_buffer = br_state.bytes_in_buffer;
+         if (cinfo->unread_marker == 0) {
+           while (bits_left < MIN_GET_BITS) {
+             register int c;
+             if (bytes_in_buffer == 0 ||
+                 (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+               goto label11; }
+             bytes_in_buffer--; next_input_byte++;
+             get_buffer = (get_buffer << 8) | c;
+             bits_left += 8;
+           }
+           br_state.next_input_byte = next_input_byte;
+           br_state.bytes_in_buffer = bytes_in_buffer;
+         } else {
+       label11:
+           br_state.next_input_byte = next_input_byte;
+           br_state.bytes_in_buffer = bytes_in_buffer;
+           if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) {
+             return FALSE; }
+           get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+           if (bits_left < HUFFX_LOOKAHEAD) {
+             nb = 1; goto label1;
+           }
+         }
+       }
+       look = PEEK_BITS(HUFFX_LOOKAHEAD);
+       if ((nb = dctbl->lookx_nbits[look]) != 0) {
+         s = dctbl->lookx_val[look];
+         if (nb <= HUFFX_LOOKAHEAD) {
+           DROP_BITS(nb);
+         } else {
+           DROP_BITS(HUFFX_LOOKAHEAD);
+           nb -= HUFFX_LOOKAHEAD;
+           CHECK_BIT_BUFFER(br_state, nb, return FALSE);
+           s += GET_BITS(nb);
+         }
+       } else {
+         nb = HUFFX_LOOKAHEAD;
+      label1:
+         if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,dctbl,nb))
+              < 0) { return FALSE; }
+         get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+         if (s) {
+           CHECK_BIT_BUFFER(br_state, s, return FALSE);
+           t = GET_BITS(s);
+           s = HUFF_EXTEND(t, s);
+         }
+       }
       }
-
       if (entropy->dc_needed[blkn]) {
        /* Convert DC difference to actual value, update last_dc_val */
        int ci = cinfo->MCU_membership[blkn];
@@ -569,16 +597,65 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
        /* Section F.2.2.2: decode the AC coefficients */
        /* Since zeroes are skipped, output area must be cleared beforehand */
        for (k = 1; k < DCTSIZE2; k++) {
-         HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
-      
-         r = s >> 4;
-         s &= 15;
-      
+         {     /* HUFFX_DECODE */
+           register int nb, look, t;
+           if (bits_left < HUFFX_LOOKAHEAD) {
+             register const JOCTET * next_input_byte
+                                             = br_state.next_input_byte;
+             register size_t bytes_in_buffer = br_state.bytes_in_buffer;
+             if (cinfo->unread_marker == 0) {
+               while (bits_left < MIN_GET_BITS) {
+                 register int c;
+                 if (bytes_in_buffer == 0 ||
+                     (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+                   goto label21; }
+                 bytes_in_buffer--; next_input_byte++;
+                 get_buffer = (get_buffer << 8) | c;
+                 bits_left += 8;
+               }
+               br_state.next_input_byte = next_input_byte;
+               br_state.bytes_in_buffer = bytes_in_buffer;
+             } else {
+           label21:
+               br_state.next_input_byte = next_input_byte;
+               br_state.bytes_in_buffer = bytes_in_buffer;
+               if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left,0)) {
+                 return FALSE; }
+               get_buffer = br_state.get_buffer;
+               bits_left  = br_state.bits_left;
+               if (bits_left < HUFFX_LOOKAHEAD) {
+                 nb = 1; goto label2;
+               }
+             }
+           }
+           look = PEEK_BITS(HUFFX_LOOKAHEAD);
+           if ((nb = actbl->lookx_nbits[look]) != 0) {
+             s = actbl->lookx_val[look];
+             r = actbl->lookx_sym[look] >> 4;
+             if (nb <= HUFFX_LOOKAHEAD) {
+               DROP_BITS(nb);
+             } else {
+               DROP_BITS(HUFFX_LOOKAHEAD);
+               nb -= HUFFX_LOOKAHEAD;
+               CHECK_BIT_BUFFER(br_state, nb, return FALSE);
+               s += GET_BITS(nb);
+             }
+           } else {
+             nb = HUFFX_LOOKAHEAD;
+         label2:
+             if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,actbl,nb))
+                  < 0) { return FALSE; }
+             get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+             r = s >> 4; s &= 15;
+             if (s) {
+               CHECK_BIT_BUFFER(br_state, s, return FALSE);
+               t = GET_BITS(s);
+               s = HUFF_EXTEND(t, s);
+             }
+           }
+         }
          if (s) {
            k += r;
-           CHECK_BIT_BUFFER(br_state, s, return FALSE);
-           r = GET_BITS(s);
-           s = HUFF_EXTEND(r, s);
            /* Output coefficient in natural (dezigzagged) order.
             * Note: the extra entries in jpeg_natural_order[] will save us
             * if k >= DCTSIZE2, which could happen if the data is corrupted.
@@ -596,15 +673,64 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
        /* Section F.2.2.2: decode the AC coefficients */
        /* In this path we just discard the values */
        for (k = 1; k < DCTSIZE2; k++) {
-         HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
-      
-         r = s >> 4;
-         s &= 15;
-      
+         {     /* HUFFX_DECODE */
+           register int nb, look;
+           if (bits_left < HUFFX_LOOKAHEAD) {
+             register const JOCTET * next_input_byte
+                                             = br_state.next_input_byte;
+             register size_t bytes_in_buffer = br_state.bytes_in_buffer;
+             if (cinfo->unread_marker == 0) {
+               while (bits_left < MIN_GET_BITS) {
+                 register int c;
+                 if (bytes_in_buffer == 0 ||
+                     (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+                   goto label31; }
+                 bytes_in_buffer--; next_input_byte++;
+                 get_buffer = (get_buffer << 8) | c;
+                 bits_left += 8;
+               }
+               br_state.next_input_byte = next_input_byte;
+               br_state.bytes_in_buffer = bytes_in_buffer;
+             } else {
+           label31:
+               br_state.next_input_byte = next_input_byte;
+               br_state.bytes_in_buffer = bytes_in_buffer;
+               if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left,0)) {
+                 return FALSE; }
+               get_buffer = br_state.get_buffer;
+               bits_left  = br_state.bits_left;
+               if (bits_left < HUFFX_LOOKAHEAD) {
+                 nb = 1; goto label3;
+               }
+             }
+           }
+           look = PEEK_BITS(HUFFX_LOOKAHEAD);
+           if ((nb = actbl->lookx_nbits[look]) != 0) {
+             s = actbl->lookx_sym[look];
+             r = s >> 4; s &= 15;
+             if (nb <= HUFFX_LOOKAHEAD) {
+               DROP_BITS(nb);
+             } else {
+               DROP_BITS(HUFFX_LOOKAHEAD);
+               nb -= HUFFX_LOOKAHEAD;
+               CHECK_BIT_BUFFER(br_state, nb, return FALSE);
+               DROP_BITS(nb);
+             }
+           } else {
+             nb = HUFFX_LOOKAHEAD;
+         label3:
+             if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,actbl,nb))
+                  < 0) { return FALSE; }
+             get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+             r = s >> 4; s &= 15;
+             if (s) {
+               CHECK_BIT_BUFFER(br_state, s, return FALSE);
+               DROP_BITS(s);
+             }
+           }
+         }
          if (s) {
            k += r;
-           CHECK_BIT_BUFFER(br_state, s, return FALSE);
-           DROP_BITS(s);
          } else {
            if (r != 15)
              break;
index ae19b6cafd7e81f94499d92876a6cf88bcafda86..b5e193ea6b3de72a83d904a845e6165c6746c413 100644 (file)
--- a/jdhuff.h
+++ b/jdhuff.h
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : October 31, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains declarations for Huffman entropy decoding routines
  * that are shared between the sequential decoder (jdhuff.c) and the
  * progressive decoder (jdphuff.c).  No other modules need to see these.
@@ -21,7 +28,7 @@
 
 /* Derived data constructed for each Huffman table */
 
-#define HUFF_LOOKAHEAD 8       /* # of bits of lookahead */
+#define HUFFX_LOOKAHEAD        9       /* # of bits of lookahead */
 
 typedef struct {
   /* Basic tables: (element [0] of each array is unused) */
@@ -36,13 +43,15 @@ typedef struct {
   /* Link to public Huffman table (needed only in jpeg_huff_decode) */
   JHUFF_TBL *pub;
 
-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+  /* Lookahead tables: indexed by the next HUFFX_LOOKAHEAD bits of
    * the input data stream.  If the next Huffman code is no more
-   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
+   * than HUFFX_LOOKAHEAD-1 bits long, we can obtain its length,
+   * the corresponding symbol, and the encoded coefficient value
+   * directly from these tables.
    */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+  UINT8 lookx_nbits[1<<HUFFX_LOOKAHEAD];  /* # bits, or 0 if too long */
+  INT16 lookx_val[1<<HUFFX_LOOKAHEAD];  /* coefficient value, or unused */
+  UINT8 lookx_sym[1<<HUFFX_LOOKAHEAD];  /* symbol, or unused */
 } d_derived_tbl;
 
 /* Expand a Huffman table definition into the derived format */
@@ -79,6 +88,21 @@ typedef INT32 bit_buf_type;  /* type of bit-extraction buffer */
  * because not all machines measure sizeof in 8-bit bytes.
  */
 
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15       /* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+/* On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
 typedef struct {               /* Bitreading state saved across MCUs */
   bit_buf_type get_buffer;     /* current bit-extraction buffer */
   int bits_left;               /* # of unused bits in it */
@@ -109,7 +133,7 @@ typedef struct {            /* Bitreading working state within an MCU */
        br_state.next_input_byte = cinfop->src->next_input_byte; \
        br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
        get_buffer = permstate.get_buffer; \
-       bits_left = permstate.bits_left;
+       bits_left = permstate.bits_left
 
 #define BITREAD_SAVE_STATE(cinfop,permstate)  \
        cinfop->src->next_input_byte = br_state.next_input_byte; \
@@ -155,47 +179,14 @@ EXTERN(boolean) jpeg_fill_bit_buffer
        JPP((bitread_working_state * state, register bit_buf_type get_buffer,
             register int bits_left, int nbits));
 
-
-/*
- * Code for extracting next Huffman-coded symbol from input bit stream.
- * Again, this is time-critical and we make the main paths be macros.
- *
- * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
- * without looping.  Usually, more than 95% of the Huffman codes will be 8
- * or fewer bits long.  The few overlength codes are handled with a loop,
- * which need not be inline code.
- *
- * Notes about the HUFF_DECODE macro:
- * 1. Near the end of the data segment, we may fail to get enough bits
- *    for a lookahead.  In that case, we do it the hard way.
- * 2. If the lookahead table contains no entry, the next code must be
- *    more than HUFF_LOOKAHEAD bits long.
- * 3. jpeg_huff_decode returns -1 if forced to suspend.
- */
-
-#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
-{ register int nb, look; \
-  if (bits_left < HUFF_LOOKAHEAD) { \
-    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-    if (bits_left < HUFF_LOOKAHEAD) { \
-      nb = 1; goto slowlabel; \
-    } \
-  } \
-  look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
-    DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
-  } else { \
-    nb = HUFF_LOOKAHEAD+1; \
-slowlabel: \
-    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-       { failaction; } \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-  } \
-}
-
 /* Out-of-line case for Huffman code fetching */
 EXTERN(int) jpeg_huff_decode
        JPP((bitread_working_state * state, register bit_buf_type get_buffer,
             register int bits_left, d_derived_tbl * htbl, int min_bits));
+
+
+/*
+ * Figure F.12: extend sign bit.
+ */
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
index 37444468c2370a71f1317a50ddaf3287e3e04969..f440d4095eeb6c4caca254a20efa4086a4a1009d 100644 (file)
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file contains code for merged upsampling/color conversion.
  *
  * This file combines functions from jdsample.c and jdcolor.c;
@@ -35,6 +42,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcolsamp.h"          /* Private declarations */
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
@@ -218,6 +226,17 @@ merged_1v_upsample (j_decompress_ptr cinfo,
  */
 
 
+#if RGB_PIXELSIZE == 4
+/* offset of filler byte */
+#define RGB_FILLER  (6 - (RGB_RED) - (RGB_GREEN) - (RGB_BLUE))
+/* byte pattern to fill with */
+#ifdef RGBX_FILLER_0XFF
+#define RGB_FILLER_BYTE 0xFF
+#else
+#define RGB_FILLER_BYTE 0x00
+#endif
+#endif /* RGB_PIXELSIZE == 4 */
+
 /*
  * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
  */
@@ -258,11 +277,17 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr += RGB_PIXELSIZE;
     y  = GETJSAMPLE(*inptr0++);
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr += RGB_PIXELSIZE;
   }
   /* If image width is odd, do the last output column separately */
@@ -276,6 +301,9 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
   }
 }
 
@@ -322,21 +350,33 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr0[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr0 += RGB_PIXELSIZE;
     y  = GETJSAMPLE(*inptr00++);
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr0[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr0 += RGB_PIXELSIZE;
     y  = GETJSAMPLE(*inptr01++);
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr1[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr1 += RGB_PIXELSIZE;
     y  = GETJSAMPLE(*inptr01++);
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr1[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     outptr1 += RGB_PIXELSIZE;
   }
   /* If image width is odd, do the last output column separately */
@@ -350,10 +390,16 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr0[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
     y  = GETJSAMPLE(*inptr01);
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#if RGB_PIXELSIZE == 4
+    outptr1[RGB_FILLER] = RGB_FILLER_BYTE;
+#endif
   }
 }
 
@@ -370,6 +416,7 @@ GLOBAL(void)
 jinit_merged_upsampler (j_decompress_ptr cinfo)
 {
   my_upsample_ptr upsample;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -382,19 +429,73 @@ jinit_merged_upsampler (j_decompress_ptr cinfo)
 
   if (cinfo->max_v_samp_factor == 2) {
     upsample->pub.upsample = merged_2v_upsample;
-    upsample->upmethod = h2v2_merged_upsample;
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JDMERGE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2)) {
+      upsample->upmethod = jpeg_h2v2_merged_upsample_sse2;
+    } else
+#endif
+#ifdef JDMERGE_MMX_SUPPORTED
+    if (simd & JSIMD_MMX) {
+      upsample->upmethod = jpeg_h2v2_merged_upsample_mmx;
+    } else
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+    {
+      upsample->upmethod = h2v2_merged_upsample;
+      build_ycc_rgb_table(cinfo);
+    }
     /* Allocate a spare row buffer */
     upsample->spare_row = (JSAMPROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
                (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
   } else {
     upsample->pub.upsample = merged_1v_upsample;
-    upsample->upmethod = h2v1_merged_upsample;
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JDMERGE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2)) {
+      upsample->upmethod = jpeg_h2v1_merged_upsample_sse2;
+    } else
+#endif
+#ifdef JDMERGE_MMX_SUPPORTED
+    if (simd & JSIMD_MMX) {
+      upsample->upmethod = jpeg_h2v1_merged_upsample_mmx;
+    } else
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+    {
+      upsample->upmethod = h2v1_merged_upsample;
+      build_ycc_rgb_table(cinfo);
+    }
     /* No spare row needed */
     upsample->spare_row = NULL;
   }
+}
+
 
-  build_ycc_rgb_table(cinfo);
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_merged_upsampler (j_decompress_ptr cinfo)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+#ifdef JDMERGE_SSE2_SUPPORTED
+  if (simd & JSIMD_SSE2 &&
+      IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2))
+    return JSIMD_SSE2;
+#endif
+#ifdef JDMERGE_MMX_SUPPORTED
+  if (simd & JSIMD_MMX)
+    return JSIMD_MMX;
+#endif
+#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */
+
+  return JSIMD_NONE;
 }
 
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
 #endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/jdmermmx.asm b/jdmermmx.asm
new file mode 100644 (file)
index 0000000..4c88515
--- /dev/null
@@ -0,0 +1,981 @@
+;
+; jdmermmx.asm - merged upsampling/color conversion (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef UPSAMPLE_MERGING_SUPPORTED
+%ifdef JDMERGE_MMX_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_344        equ      22554                  ; FIX(0.34414)
+F_0_714        equ      46802                  ; FIX(0.71414)
+F_1_402        equ      91881                  ; FIX(1.40200)
+F_1_772        equ     116130                  ; FIX(1.77200)
+F_0_402        equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285        equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228        equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_merged_upsample_mmx)
+
+EXTN(jconst_merged_upsample_mmx):
+
+PW_F0402       times 4 dw  F_0_402
+PW_MF0228      times 4 dw -F_0_228
+PW_MF0344_F0285        times 2 dw -F_0_344, F_0_285
+PW_ONE         times 4 dw  1
+PD_ONEHALF     times 2 dd  1 << (SCALEBITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v1_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                JDIMENSION in_row_group_ctr,
+;                                JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         3
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_merged_upsample_mmx)
+
+EXTN(jpeg_h2v1_merged_upsample_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; col
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+       mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+       mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+       mov     edi, JSAMPROW [edi]                             ; outptr
+
+       pop     ecx                     ; col
+
+       alignx  16,7
+.columnloop:
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       movq      mm6, MMWORD [ebx]     ; mm6=Cb(01234567)
+       movq      mm7, MMWORD [edx]     ; mm7=Cr(01234567)
+
+       pxor      mm1,mm1               ; mm1=(all 0's)
+       pcmpeqw   mm3,mm3
+       psllw     mm3,7                 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
+
+       movq      mm4,mm6
+       punpckhbw mm6,mm1               ; mm6=Cb(4567)=CbH
+       punpcklbw mm4,mm1               ; mm4=Cb(0123)=CbL
+       movq      mm0,mm7
+       punpckhbw mm7,mm1               ; mm7=Cr(4567)=CrH
+       punpcklbw mm0,mm1               ; mm0=Cr(0123)=CrL
+
+       paddw     mm6,mm3
+       paddw     mm4,mm3
+       paddw     mm7,mm3
+       paddw     mm0,mm3
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movq    mm5,mm6                 ; mm5=CbH
+       movq    mm2,mm4                 ; mm2=CbL
+       paddw   mm6,mm6                 ; mm6=2*CbH
+       paddw   mm4,mm4                 ; mm4=2*CbL
+       movq    mm1,mm7                 ; mm1=CrH
+       movq    mm3,mm0                 ; mm3=CrL
+       paddw   mm7,mm7                 ; mm7=2*CrH
+       paddw   mm0,mm0                 ; mm0=2*CrL
+
+       pmulhw  mm6,[GOTOFF(eax,PW_MF0228)]     ; mm6=(2*CbH * -FIX(0.22800))
+       pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbL * -FIX(0.22800))
+       pmulhw  mm7,[GOTOFF(eax,PW_F0402)]      ; mm7=(2*CrH * FIX(0.40200))
+       pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrL * FIX(0.40200))
+
+       paddw   mm6,[GOTOFF(eax,PW_ONE)]
+       paddw   mm4,[GOTOFF(eax,PW_ONE)]
+       psraw   mm6,1                   ; mm6=(CbH * -FIX(0.22800))
+       psraw   mm4,1                   ; mm4=(CbL * -FIX(0.22800))
+       paddw   mm7,[GOTOFF(eax,PW_ONE)]
+       paddw   mm0,[GOTOFF(eax,PW_ONE)]
+       psraw   mm7,1                   ; mm7=(CrH * FIX(0.40200))
+       psraw   mm0,1                   ; mm0=(CrL * FIX(0.40200))
+
+       paddw   mm6,mm5
+       paddw   mm4,mm2
+       paddw   mm6,mm5                 ; mm6=(CbH * FIX(1.77200))=(B-Y)H
+       paddw   mm4,mm2                 ; mm4=(CbL * FIX(1.77200))=(B-Y)L
+       paddw   mm7,mm1                 ; mm7=(CrH * FIX(1.40200))=(R-Y)H
+       paddw   mm0,mm3                 ; mm0=(CrL * FIX(1.40200))=(R-Y)L
+
+       movq    MMWORD [wk(0)], mm6     ; wk(0)=(B-Y)H
+       movq    MMWORD [wk(1)], mm7     ; wk(1)=(R-Y)H
+
+       movq      mm6,mm5
+       movq      mm7,mm2
+       punpcklwd mm5,mm1
+       punpckhwd mm6,mm1
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm6,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd mm2,mm3
+       punpckhwd mm7,mm3
+       pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm6,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm5,SCALEBITS
+       psrad     mm6,SCALEBITS
+       paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm7,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm2,SCALEBITS
+       psrad     mm7,SCALEBITS
+
+       packssdw  mm5,mm6       ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+       packssdw  mm2,mm7       ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+       psubw     mm5,mm1       ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+       psubw     mm2,mm3       ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+       movq    MMWORD [wk(2)], mm5     ; wk(2)=(G-Y)H
+
+       mov     al,2                    ; Yctr
+       jmp     short .Yloop_1st
+       alignx  16,7
+
+.Yloop_2nd:
+       movq    mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
+       movq    mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
+       movq    mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
+       alignx  16,7
+
+.Yloop_1st:
+       movq    mm7, MMWORD [esi]       ; mm7=Y(01234567)
+
+       pcmpeqw mm6,mm6
+       psrlw   mm6,BYTE_BIT            ; mm6={0xFF 0x00 0xFF 0x00 ..}
+       pand    mm6,mm7                 ; mm6=Y(0246)=YE
+       psrlw   mm7,BYTE_BIT            ; mm7=Y(1357)=YO
+
+       movq    mm1,mm0                 ; mm1=mm0=(R-Y)(L/H)
+       movq    mm3,mm2                 ; mm3=mm2=(G-Y)(L/H)
+       movq    mm5,mm4                 ; mm5=mm4=(B-Y)(L/H)
+
+       paddw     mm0,mm6               ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6)
+       paddw     mm1,mm7               ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7)
+       packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+       packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
+
+       paddw     mm2,mm6               ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6)
+       paddw     mm3,mm7               ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7)
+       packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+       packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+       paddw     mm4,mm6               ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6)
+       paddw     mm5,mm7               ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7)
+       packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+       packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+       punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
+
+       movq      mmG,mmA
+       movq      mmH,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+       punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
+
+       psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+       psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
+
+       movq      mmC,mmD
+       movq      mmB,mmD
+       punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+       punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
+
+       psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
+
+       movq      mmF,mmE
+       punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+       punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
+
+       punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+       punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+       punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      short .endcolumn
+
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+       add     esi, byte SIZEOF_MMWORD                 ; inptr0
+       dec     al                      ; Yctr
+       jnz     near .Yloop_2nd
+
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+       add     edx, byte SIZEOF_MMWORD                 ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_MMWORD
+       jb      short .column_st8
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    mmA,mmC
+       sub     ecx, byte 2*SIZEOF_MMWORD
+       add     edi, byte 2*SIZEOF_MMWORD
+       jmp     short .column_st4
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st4
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmA,mmE
+       sub     ecx, byte SIZEOF_MMWORD
+       add     edi, byte SIZEOF_MMWORD
+.column_st4:
+       movd    eax,mmA
+       cmp     ecx, byte SIZEOF_DWORD
+       jb      short .column_st2
+       mov     DWORD [edi+0*SIZEOF_DWORD], eax
+       psrlq   mmA,DWORD_BIT
+       movd    eax,mmA
+       sub     ecx, byte SIZEOF_DWORD
+       add     edi, byte SIZEOF_DWORD
+.column_st2:
+       cmp     ecx, byte SIZEOF_WORD
+       jb      short .column_st1
+       mov     WORD [edi+0*SIZEOF_WORD], ax
+       shr     eax,WORD_BIT
+       sub     ecx, byte SIZEOF_WORD
+       add     edi, byte SIZEOF_WORD
+.column_st1:
+       cmp     ecx, byte SIZEOF_BYTE
+       jb      short .endcolumn
+       mov     BYTE [edi+0*SIZEOF_BYTE], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%else
+       pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%endif
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+       punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+       punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
+
+       movq      mmC,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+       punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+       movq      mmG,mmB
+       punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+       punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
+
+       movq      mmD,mmA
+       punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+       punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+       movq      mmH,mmC
+       punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+       punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      short .endcolumn
+
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+       add     esi, byte SIZEOF_MMWORD                 ; inptr0
+       dec     al                      ; Yctr
+       jnz     near .Yloop_2nd
+
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+       add     edx, byte SIZEOF_MMWORD                 ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       cmp     ecx, byte SIZEOF_MMWORD/2
+       jb      short .column_st8
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    mmA,mmC
+       movq    mmD,mmH
+       sub     ecx, byte SIZEOF_MMWORD/2
+       add     edi, byte 2*SIZEOF_MMWORD
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD/4
+       jb      short .column_st4
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmA,mmD
+       sub     ecx, byte SIZEOF_MMWORD/4
+       add     edi, byte 1*SIZEOF_MMWORD
+.column_st4:
+       cmp     ecx, byte SIZEOF_MMWORD/8
+       jb      short .endcolumn
+       movd    DWORD [edi+0*SIZEOF_DWORD], mmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%ifndef USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                JDIMENSION in_row_group_ctr,
+;                                JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+       align   16
+       global  EXTN(jpeg_h2v2_merged_upsample_mmx)
+
+EXTN(jpeg_h2v2_merged_upsample_mmx):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     eax, POINTER [cinfo(ebp)]
+
+       mov     edi, JSAMPIMAGE [input_buf(ebp)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(ebp)]
+       lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+
+       push    edx                     ; inptr2
+       push    ebx                     ; inptr1
+       push    esi                     ; inptr00
+       mov     ebx,esp
+
+       push    edi                     ; output_buf (outptr0)
+       push    ecx                     ; in_row_group_ctr
+       push    ebx                     ; input_buf
+       push    eax                     ; cinfo
+
+       call    near EXTN(jpeg_h2v1_merged_upsample_mmx)
+
+       add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+       add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+       mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+       mov     POINTER [ebx-1*SIZEOF_POINTER], edi
+
+       call    near EXTN(jpeg_h2v1_merged_upsample_mmx)
+
+       add     esp, byte 7*SIZEOF_DWORD
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%else  ; USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                JDIMENSION in_row_group_ctr,
+;                                JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         10
+%define inptr1         wk(0)-SIZEOF_JSAMPROW   ; JSAMPROW inptr1
+%define inptr2         inptr1-SIZEOF_JSAMPROW  ; JSAMPROW inptr2
+%define gotptr         inptr2-SIZEOF_POINTER   ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_merged_upsample_mmx)
+
+EXTN(jpeg_h2v2_merged_upsample_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [inptr2]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; col
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     eax, JSAMPROW [esi+(ecx*2+0)*SIZEOF_JSAMPROW]   ; inptr00
+       mov     esi, JSAMPROW [esi+(ecx*2+1)*SIZEOF_JSAMPROW]   ; inptr01
+       mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+       mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+
+       pop     ecx             ; col
+       push    eax             ; inptr00
+       push    esi             ; inptr01
+
+       mov     esi, JSAMPROW [edi+0*SIZEOF_JSAMPROW]           ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]           ; outptr1
+       alignx  16,7
+.columnloop:
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       movq    mm6, MMWORD [ebx]       ; mm6=Cb(01234567)
+       movq    mm7, MMWORD [edx]       ; mm7=Cr(01234567)
+
+       mov     JSAMPROW [inptr1], ebx  ; inptr1
+       mov     JSAMPROW [inptr2], edx  ; inptr2
+       pop     edx                     ; edx=inptr01
+       pop     ebx                     ; ebx=inptr00
+
+       pxor      mm1,mm1               ; mm1=(all 0's)
+       pcmpeqw   mm3,mm3
+       psllw     mm3,7                 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
+
+       movq      mm4,mm6
+       punpckhbw mm6,mm1               ; mm6=Cb(4567)=CbH
+       punpcklbw mm4,mm1               ; mm4=Cb(0123)=CbL
+       movq      mm0,mm7
+       punpckhbw mm7,mm1               ; mm7=Cr(4567)=CrH
+       punpcklbw mm0,mm1               ; mm0=Cr(0123)=CrL
+
+       paddw     mm6,mm3
+       paddw     mm4,mm3
+       paddw     mm7,mm3
+       paddw     mm0,mm3
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movq    mm5,mm6                 ; mm5=CbH
+       movq    mm2,mm4                 ; mm2=CbL
+       paddw   mm6,mm6                 ; mm6=2*CbH
+       paddw   mm4,mm4                 ; mm4=2*CbL
+       movq    mm1,mm7                 ; mm1=CrH
+       movq    mm3,mm0                 ; mm3=CrL
+       paddw   mm7,mm7                 ; mm7=2*CrH
+       paddw   mm0,mm0                 ; mm0=2*CrL
+
+       pmulhw  mm6,[GOTOFF(eax,PW_MF0228)]     ; mm6=(2*CbH * -FIX(0.22800))
+       pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbL * -FIX(0.22800))
+       pmulhw  mm7,[GOTOFF(eax,PW_F0402)]      ; mm7=(2*CrH * FIX(0.40200))
+       pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrL * FIX(0.40200))
+
+       paddw   mm6,[GOTOFF(eax,PW_ONE)]
+       paddw   mm4,[GOTOFF(eax,PW_ONE)]
+       psraw   mm6,1                   ; mm6=(CbH * -FIX(0.22800))
+       psraw   mm4,1                   ; mm4=(CbL * -FIX(0.22800))
+       paddw   mm7,[GOTOFF(eax,PW_ONE)]
+       paddw   mm0,[GOTOFF(eax,PW_ONE)]
+       psraw   mm7,1                   ; mm7=(CrH * FIX(0.40200))
+       psraw   mm0,1                   ; mm0=(CrL * FIX(0.40200))
+
+       paddw   mm6,mm5
+       paddw   mm4,mm2
+       paddw   mm6,mm5                 ; mm6=(CbH * FIX(1.77200))=(B-Y)H
+       paddw   mm4,mm2                 ; mm4=(CbL * FIX(1.77200))=(B-Y)L
+       paddw   mm7,mm1                 ; mm7=(CrH * FIX(1.40200))=(R-Y)H
+       paddw   mm0,mm3                 ; mm0=(CrL * FIX(1.40200))=(R-Y)L
+
+       movq    MMWORD [wk(0)], mm6     ; wk(0)=(B-Y)H
+       movq    MMWORD [wk(1)], mm7     ; wk(1)=(R-Y)H
+
+       movq      mm6,mm5
+       movq      mm7,mm2
+       punpcklwd mm5,mm1
+       punpckhwd mm6,mm1
+       pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm6,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd mm2,mm3
+       punpckhwd mm7,mm3
+       pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   mm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm6,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm5,SCALEBITS
+       psrad     mm6,SCALEBITS
+       paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     mm7,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     mm2,SCALEBITS
+       psrad     mm7,SCALEBITS
+
+       packssdw  mm5,mm6       ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+       packssdw  mm2,mm7       ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+       psubw     mm5,mm1       ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+       psubw     mm2,mm3       ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+       movq    MMWORD [wk(2)], mm5     ; wk(2)=(G-Y)H
+
+       mov     ah,2                    ; YHctr
+       jmp     short .YHloop_1st
+       alignx  16,7
+
+.YHloop_2nd:
+       movq    mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
+       movq    mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
+       movq    mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
+       alignx  16,7
+
+.YHloop_1st:
+       movq    MMWORD [wk(3)], mm0     ; wk(3)=(R-Y)(L/H)
+       movq    MMWORD [wk(4)], mm2     ; wk(4)=(G-Y)(L/H)
+       movq    MMWORD [wk(5)], mm4     ; wk(5)=(B-Y)(L/H)
+
+       movq    mm7, MMWORD [ebx]       ; mm7=Y(01234567)
+
+       mov     al,2                    ; YVctr
+       jmp     short .YVloop_1st
+       alignx  16,7
+
+.YVloop_2nd:
+       movq    mm0, MMWORD [wk(3)]     ; mm0=(R-Y)(L/H)
+       movq    mm2, MMWORD [wk(4)]     ; mm2=(G-Y)(L/H)
+       movq    mm4, MMWORD [wk(5)]     ; mm4=(B-Y)(L/H)
+
+       movq    mm7, MMWORD [edx]       ; mm7=Y(01234567)
+       alignx  16,7
+
+.YVloop_1st:
+       pcmpeqw mm6,mm6
+       psrlw   mm6,BYTE_BIT            ; mm6={0xFF 0x00 0xFF 0x00 ..}
+       pand    mm6,mm7                 ; mm6=Y(0246)=YE
+       psrlw   mm7,BYTE_BIT            ; mm7=Y(1357)=YO
+
+       movq    mm1,mm0                 ; mm1=mm0=(R-Y)(L/H)
+       movq    mm3,mm2                 ; mm3=mm2=(G-Y)(L/H)
+       movq    mm5,mm4                 ; mm5=mm4=(B-Y)(L/H)
+
+       paddw     mm0,mm6               ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6)
+       paddw     mm1,mm7               ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7)
+       packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+       packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
+
+       paddw     mm2,mm6               ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6)
+       paddw     mm3,mm7               ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7)
+       packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+       packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+       paddw     mm4,mm6               ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6)
+       paddw     mm5,mm7               ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7)
+       packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+       packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+       punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
+
+       movq      mmG,mmA
+       movq      mmH,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+       punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
+
+       psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+       psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
+
+       movq      mmC,mmD
+       movq      mmB,mmD
+       punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+       punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
+
+       psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
+
+       movq      mmF,mmE
+       punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+       punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
+
+       punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+       punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+       punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
+
+       dec     al                      ; YVctr
+       jz      short .YVloop_break
+
+       movq    MMWORD [wk(6)], mmA
+       movq    MMWORD [wk(7)], mmE
+       movq    MMWORD [wk(8)], mmC
+
+       jmp     near .YVloop_2nd
+       alignx  16,7
+
+.YVloop_break:
+       movq    mmH, MMWORD [wk(6)]
+       movq    mmB, MMWORD [wk(7)]
+       movq    mmD, MMWORD [wk(8)]
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmH
+       movq    MMWORD [esi+1*SIZEOF_MMWORD], mmB
+       movq    MMWORD [esi+2*SIZEOF_MMWORD], mmD
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      near .endcolumn
+
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr0
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr1
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr00
+       add     edx, byte SIZEOF_MMWORD                 ; inptr01
+       dec     ah                      ; YHctr
+       jnz     near .YHloop_2nd
+
+       push    ebx                     ; inptr00
+       push    edx                     ; inptr01
+       mov     ebx, JSAMPROW [inptr1]  ; ebx=inptr1
+       mov     edx, JSAMPROW [inptr2]  ; edx=inptr2
+       add     ebx, byte SIZEOF_MMWORD ; inptr1
+       add     edx, byte SIZEOF_MMWORD ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_MMWORD
+       jb      short .column_st8
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmH
+       movq    MMWORD [esi+1*SIZEOF_MMWORD], mmB
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+       movq    mmH,mmD
+       movq    mmA,mmC
+       sub     ecx, byte 2*SIZEOF_MMWORD
+       add     esi, byte 2*SIZEOF_MMWORD
+       add     edi, byte 2*SIZEOF_MMWORD
+       jmp     short .column_st4
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st4
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmH
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmH,mmB
+       movq    mmA,mmE
+       sub     ecx, byte SIZEOF_MMWORD
+       add     esi, byte SIZEOF_MMWORD
+       add     edi, byte SIZEOF_MMWORD
+.column_st4:
+       movd    eax,mmH
+       movd    edx,mmA
+       cmp     ecx, byte SIZEOF_DWORD
+       jb      short .column_st2
+       mov     DWORD [esi+0*SIZEOF_DWORD], eax
+       mov     DWORD [edi+0*SIZEOF_DWORD], edx
+       psrlq   mmH,DWORD_BIT
+       psrlq   mmA,DWORD_BIT
+       movd    eax,mmH
+       movd    edx,mmA
+       sub     ecx, byte SIZEOF_DWORD
+       add     esi, byte SIZEOF_DWORD
+       add     edi, byte SIZEOF_DWORD
+.column_st2:
+       cmp     ecx, byte SIZEOF_WORD
+       jb      short .column_st1
+       mov     WORD [esi+0*SIZEOF_WORD], ax
+       mov     WORD [edi+0*SIZEOF_WORD], dx
+       shr     eax,WORD_BIT
+       shr     edx,WORD_BIT
+       sub     ecx, byte SIZEOF_WORD
+       add     esi, byte SIZEOF_WORD
+       add     edi, byte SIZEOF_WORD
+.column_st1:
+       cmp     ecx, byte SIZEOF_BYTE
+       jb      short .endcolumn
+       mov     BYTE [esi+0*SIZEOF_BYTE], al
+       mov     BYTE [edi+0*SIZEOF_BYTE], dl
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%else
+       pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+       pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%endif
+       ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+       ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+       ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+       ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+
+       punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+       punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+       punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+       punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
+
+       movq      mmC,mmA
+       punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+       punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+       movq      mmG,mmB
+       punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+       punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
+
+       movq      mmD,mmA
+       punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+       punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+       movq      mmH,mmC
+       punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+       punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
+
+       dec     al                      ; YVctr
+       jz      short .YVloop_break
+
+       movq    MMWORD [wk(6)], mmA
+       movq    MMWORD [wk(7)], mmD
+       movq    MMWORD [wk(8)], mmC
+       movq    MMWORD [wk(9)], mmH
+
+       jmp     near .YVloop_2nd
+       alignx  16,7
+
+.YVloop_break:
+       movq    mmE, MMWORD [wk(6)]
+       movq    mmF, MMWORD [wk(7)]
+       movq    mmB, MMWORD [wk(8)]
+       movq    mmG, MMWORD [wk(9)]
+
+       cmp     ecx, byte SIZEOF_MMWORD
+       jb      short .column_st16
+
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmE
+       movq    MMWORD [esi+1*SIZEOF_MMWORD], mmF
+       movq    MMWORD [esi+2*SIZEOF_MMWORD], mmB
+       movq    MMWORD [esi+3*SIZEOF_MMWORD], mmG
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
+
+       sub     ecx, byte SIZEOF_MMWORD
+       jz      short .endcolumn
+
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr0
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr1
+       add     ebx, byte SIZEOF_MMWORD                 ; inptr00
+       add     edx, byte SIZEOF_MMWORD                 ; inptr01
+       dec     ah                      ; YHctr
+       jnz     near .YHloop_2nd
+
+       push    ebx                     ; inptr00
+       push    edx                     ; inptr01
+       mov     ebx, JSAMPROW [inptr1]  ; ebx=inptr1
+       mov     edx, JSAMPROW [inptr2]  ; edx=inptr2
+       add     ebx, byte SIZEOF_MMWORD ; inptr1
+       add     edx, byte SIZEOF_MMWORD ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st16:
+       cmp     ecx, byte SIZEOF_MMWORD/2
+       jb      short .column_st8
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmE
+       movq    MMWORD [esi+1*SIZEOF_MMWORD], mmF
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+       movq    mmE,mmB
+       movq    mmF,mmG
+       movq    mmA,mmC
+       movq    mmD,mmH
+       sub     ecx, byte SIZEOF_MMWORD/2
+       add     esi, byte 2*SIZEOF_MMWORD
+       add     edi, byte 2*SIZEOF_MMWORD
+.column_st8:
+       cmp     ecx, byte SIZEOF_MMWORD/4
+       jb      short .column_st4
+       movq    MMWORD [esi+0*SIZEOF_MMWORD], mmE
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+       movq    mmE,mmF
+       movq    mmA,mmD
+       sub     ecx, byte SIZEOF_MMWORD/4
+       add     esi, byte 1*SIZEOF_MMWORD
+       add     edi, byte 1*SIZEOF_MMWORD
+.column_st4:
+       cmp     ecx, byte SIZEOF_MMWORD/8
+       jb      short .endcolumn
+       movd    DWORD [esi+0*SIZEOF_DWORD], mmE
+       movd    DWORD [edi+0*SIZEOF_DWORD], mmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; !USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX
+
+%endif ; JDMERGE_MMX_SUPPORTED
+%endif ; UPSAMPLE_MERGING_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
diff --git a/jdmerss2.asm b/jdmerss2.asm
new file mode 100644 (file)
index 0000000..b6f51c8
--- /dev/null
@@ -0,0 +1,1272 @@
+;
+; jdmerss2.asm - merged upsampling/color conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
+%ifdef UPSAMPLE_MERGING_SUPPORTED
+%ifdef JDMERGE_SSE2_SUPPORTED
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS      16
+
+F_0_344        equ      22554                  ; FIX(0.34414)
+F_0_714        equ      46802                  ; FIX(0.71414)
+F_1_402        equ      91881                  ; FIX(1.40200)
+F_1_772        equ     116130                  ; FIX(1.77200)
+F_0_402        equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285        equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228        equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_merged_upsample_sse2)
+
+EXTN(jconst_merged_upsample_sse2):
+
+PW_F0402       times 8 dw  F_0_402
+PW_MF0228      times 8 dw -F_0_228
+PW_MF0344_F0285        times 4 dw -F_0_344, F_0_285
+PW_ONE         times 8 dw  1
+PD_ONEHALF     times 4 dd  1 << (SCALEBITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v1_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                 JDIMENSION in_row_group_ctr,
+;                                 JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         3
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_merged_upsample_sse2)
+
+EXTN(jpeg_h2v1_merged_upsample_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; col
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+       mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+       mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+       mov     edi, JSAMPROW [edi]                             ; outptr
+
+       pop     ecx                     ; col
+
+       alignx  16,7
+.columnloop:
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       movdqa    xmm6, XMMWORD [ebx]   ; xmm6=Cb(0123456789ABCDEF)
+       movdqa    xmm7, XMMWORD [edx]   ; xmm7=Cr(0123456789ABCDEF)
+
+       pxor      xmm1,xmm1             ; xmm1=(all 0's)
+       pcmpeqw   xmm3,xmm3
+       psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+       movdqa    xmm4,xmm6
+       punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+       punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+       movdqa    xmm0,xmm7
+       punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+       punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
+
+       paddw     xmm6,xmm3
+       paddw     xmm4,xmm3
+       paddw     xmm7,xmm3
+       paddw     xmm0,xmm3
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movdqa  xmm5,xmm6               ; xmm5=CbH
+       movdqa  xmm2,xmm4               ; xmm2=CbL
+       paddw   xmm6,xmm6               ; xmm6=2*CbH
+       paddw   xmm4,xmm4               ; xmm4=2*CbL
+       movdqa  xmm1,xmm7               ; xmm1=CrH
+       movdqa  xmm3,xmm0               ; xmm3=CrL
+       paddw   xmm7,xmm7               ; xmm7=2*CrH
+       paddw   xmm0,xmm0               ; xmm0=2*CrL
+
+       pmulhw  xmm6,[GOTOFF(eax,PW_MF0228)]    ; xmm6=(2*CbH * -FIX(0.22800))
+       pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbL * -FIX(0.22800))
+       pmulhw  xmm7,[GOTOFF(eax,PW_F0402)]     ; xmm7=(2*CrH * FIX(0.40200))
+       pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrL * FIX(0.40200))
+
+       paddw   xmm6,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+       psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+       paddw   xmm7,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+       psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
+
+       paddw   xmm6,xmm5
+       paddw   xmm4,xmm2
+       paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+       paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+       paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+       paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+
+       movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+       movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
+
+       movdqa    xmm6,xmm5
+       movdqa    xmm7,xmm2
+       punpcklwd xmm5,xmm1
+       punpckhwd xmm6,xmm1
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm6,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd xmm2,xmm3
+       punpckhwd xmm7,xmm3
+       pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm6,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm5,SCALEBITS
+       psrad     xmm6,SCALEBITS
+       paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm7,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm2,SCALEBITS
+       psrad     xmm7,SCALEBITS
+
+       packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+       packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+       psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+       psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+       movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
+
+       mov     al,2                    ; Yctr
+       jmp     short .Yloop_1st
+       alignx  16,7
+
+.Yloop_2nd:
+       movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+       movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+       movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
+       alignx  16,7
+
+.Yloop_1st:
+       movdqa  xmm7, XMMWORD [esi]     ; xmm7=Y(0123456789ABCDEF)
+
+       pcmpeqw xmm6,xmm6
+       psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+       pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+       psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
+
+       movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+       movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+       movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
+
+       paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+       paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+       packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+       packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+       paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+       paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+       packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+       packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+       paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+       paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+       packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+       packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+       punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+       movdqa    xmmG,xmmA
+       movdqa    xmmH,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+       punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+       psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+       psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+       movdqa    xmmC,xmmD
+       movdqa    xmmB,xmmD
+       punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+       punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+       psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+       movdqa    xmmF,xmmE
+       punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+       punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+       pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+       movdqa    xmmB,xmmE
+       punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+       punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+       punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+       pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+       movdqa    xmmB,xmmF
+       punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+       punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+       punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+       punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+       punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+       punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out1
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+       jmp     short .out0
+.out1: ; --(unaligned)-----------------
+       pcmpeqb    xmmH,xmmH                    ; xmmH=(all 1's)
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmH                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmF,xmmH                    ; movntdqu XMMWORD [edi], xmmF
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+.out0:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .endcolumn
+
+       add     esi, byte SIZEOF_XMMWORD        ; inptr0
+       dec     al                      ; Yctr
+       jnz     near .Yloop_2nd
+
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       pcmpeqb xmmH,xmmH                       ; xmmH=(all 1's)
+       lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_XMMWORD
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmH                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmF
+       sub     ecx, byte 2*SIZEOF_XMMWORD
+       jmp     short .column_st15
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmH                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmD
+       sub     ecx, byte SIZEOF_XMMWORD
+.column_st15:
+       mov     eax,ecx
+       xor     ecx, byte 0x0F
+       shl     ecx, 2
+       movd    xmmB,ecx
+       psrlq   xmmH,4
+       pcmpeqb xmmE,xmmE
+       psrlq   xmmH,xmmB
+       psrlq   xmmE,xmmB
+       punpcklbw xmmE,xmmH
+       ; ----------------
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0
+       add     eax,ecx
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmE,ecx
+       movdqa  xmmG,xmmA
+       movdqa  xmmC,xmmE
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmE, SIZEOF_XMMWORD/2
+       movd    xmmD,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1
+       movd    xmmF,ecx
+       psllq   xmmA,xmmF
+       psllq   xmmE,xmmF
+       jmp     short .adj0
+.adj1: neg     ecx
+       movd    xmmF,ecx
+       psrlq   xmmA,xmmF
+       psrlq   xmmE,xmmF
+       psllq   xmmG,xmmD
+       psllq   xmmC,xmmD
+       por     xmmA,xmmG
+       por     xmmE,xmmC
+.adj0: ; ----------------
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+       pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+       punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+       punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+       movdqa    xmmC,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+       punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+       movdqa    xmmG,xmmB
+       punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+       punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+       movdqa    xmmD,xmmA
+       punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+       punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+       movdqa    xmmH,xmmC
+       punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+       punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out1
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+       movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+       jmp     short .out0
+.out1: ; --(unaligned)-----------------
+       pcmpeqb    xmmE,xmmE                    ; xmmE=(all 1's)
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmE                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmC,xmmE                    ; movntdqu XMMWORD [edi], xmmC
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmH,xmmE                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+.out0:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .endcolumn
+
+       add     esi, byte SIZEOF_XMMWORD        ; inptr0
+       dec     al                      ; Yctr
+       jnz     near .Yloop_2nd
+
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       pcmpeqb xmmE,xmmE                       ; xmmE=(all 1's)
+       cmp     ecx, byte SIZEOF_XMMWORD/2
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       maskmovdqu xmmD,xmmE                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmC
+       movdqa  xmmD,xmmH
+       sub     ecx, byte SIZEOF_XMMWORD/2
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD/4
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr
+       movdqa  xmmA,xmmD
+       sub     ecx, byte SIZEOF_XMMWORD/4
+.column_st15:
+       cmp     ecx, byte SIZEOF_XMMWORD/16
+       jb      short .endcolumn
+       mov     eax,ecx
+       xor     ecx, byte 0x03
+       inc     ecx
+       shl     ecx, 4
+       movd    xmmF,ecx
+       psrlq   xmmE,xmmF
+       punpcklbw xmmE,xmmE
+       ; ----------------
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0
+       lea     eax, [ecx+eax*4]        ; RGB_PIXELSIZE
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmE,ecx
+       movdqa  xmmB,xmmA
+       movdqa  xmmG,xmmE
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmE, SIZEOF_XMMWORD/2
+       movd    xmmC,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1
+       movd    xmmH,ecx
+       psllq   xmmA,xmmH
+       psllq   xmmE,xmmH
+       jmp     short .adj0
+.adj1: neg     ecx
+       movd    xmmH,ecx
+       psrlq   xmmA,xmmH
+       psrlq   xmmE,xmmH
+       psllq   xmmB,xmmC
+       psllq   xmmG,xmmC
+       por     xmmA,xmmB
+       por     xmmE,xmmG
+.adj0: ; ----------------
+       maskmovdqu xmmA,xmmE                    ; movntdqu XMMWORD [edi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+       sfence          ; flush the write buffer
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%ifndef USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v2_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                 JDIMENSION in_row_group_ctr,
+;                                 JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+       align   16
+       global  EXTN(jpeg_h2v2_merged_upsample_sse2)
+
+EXTN(jpeg_h2v2_merged_upsample_sse2):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     eax, POINTER [cinfo(ebp)]
+
+       mov     edi, JSAMPIMAGE [input_buf(ebp)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(ebp)]
+       lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+
+       push    edx                     ; inptr2
+       push    ebx                     ; inptr1
+       push    esi                     ; inptr00
+       mov     ebx,esp
+
+       push    edi                     ; output_buf (outptr0)
+       push    ecx                     ; in_row_group_ctr
+       push    ebx                     ; input_buf
+       push    eax                     ; cinfo
+
+       call    near EXTN(jpeg_h2v1_merged_upsample_sse2)
+
+       add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+       add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+       mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+       mov     POINTER [ebx-1*SIZEOF_POINTER], edi
+
+       call    near EXTN(jpeg_h2v1_merged_upsample_sse2)
+
+       add     esp, byte 7*SIZEOF_DWORD
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%else  ; USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jpeg_h2v2_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+;                                 JDIMENSION in_row_group_ctr,
+;                                 JSAMPARRAY output_buf);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define input_buf(b)           (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)    (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)          (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         10
+%define inptr1         wk(0)-SIZEOF_JSAMPROW   ; JSAMPROW inptr1
+%define inptr2         inptr1-SIZEOF_JSAMPROW  ; JSAMPROW inptr2
+%define gotptr         inptr2-SIZEOF_POINTER   ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_merged_upsample_sse2)
+
+EXTN(jpeg_h2v2_merged_upsample_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [inptr2]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     ecx, POINTER [cinfo(eax)]
+       mov     ecx, JDIMENSION [jdstruct_output_width(ecx)]    ; col
+       test    ecx,ecx
+       jz      near .return
+
+       push    ecx
+
+       mov     edi, JSAMPIMAGE [input_buf(eax)]
+       mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+       mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+       mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+       mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+       mov     edi, JSAMPARRAY [output_buf(eax)]
+       mov     eax, JSAMPROW [esi+(ecx*2+0)*SIZEOF_JSAMPROW]   ; inptr00
+       mov     esi, JSAMPROW [esi+(ecx*2+1)*SIZEOF_JSAMPROW]   ; inptr01
+       mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+       mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+
+       pop     ecx             ; col
+       push    eax             ; inptr00
+       push    esi             ; inptr01
+
+       mov     esi, JSAMPROW [edi+0*SIZEOF_JSAMPROW]           ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]           ; outptr1
+       alignx  16,7
+.columnloop:
+       movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+       movdqa  xmm6, XMMWORD [ebx]     ; xmm6=Cb(0123456789ABCDEF)
+       movdqa  xmm7, XMMWORD [edx]     ; xmm7=Cr(0123456789ABCDEF)
+
+       mov     JSAMPROW [inptr1], ebx  ; inptr1
+       mov     JSAMPROW [inptr2], edx  ; inptr2
+       pop     edx                     ; edx=inptr01
+       pop     ebx                     ; ebx=inptr00
+
+       pxor      xmm1,xmm1             ; xmm1=(all 0's)
+       pcmpeqw   xmm3,xmm3
+       psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+       movdqa    xmm4,xmm6
+       punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+       punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+       movdqa    xmm0,xmm7
+       punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+       punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
+
+       paddw     xmm6,xmm3
+       paddw     xmm4,xmm3
+       paddw     xmm7,xmm3
+       paddw     xmm0,xmm3
+
+       ; (Original)
+       ; R = Y                + 1.40200 * Cr
+       ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+       ; B = Y + 1.77200 * Cb
+       ;
+       ; (This implementation)
+       ; R = Y                + 0.40200 * Cr + Cr
+       ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+       ; B = Y - 0.22800 * Cb + Cb + Cb
+
+       movdqa  xmm5,xmm6               ; xmm5=CbH
+       movdqa  xmm2,xmm4               ; xmm2=CbL
+       paddw   xmm6,xmm6               ; xmm6=2*CbH
+       paddw   xmm4,xmm4               ; xmm4=2*CbL
+       movdqa  xmm1,xmm7               ; xmm1=CrH
+       movdqa  xmm3,xmm0               ; xmm3=CrL
+       paddw   xmm7,xmm7               ; xmm7=2*CrH
+       paddw   xmm0,xmm0               ; xmm0=2*CrL
+
+       pmulhw  xmm6,[GOTOFF(eax,PW_MF0228)]    ; xmm6=(2*CbH * -FIX(0.22800))
+       pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbL * -FIX(0.22800))
+       pmulhw  xmm7,[GOTOFF(eax,PW_F0402)]     ; xmm7=(2*CrH * FIX(0.40200))
+       pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrL * FIX(0.40200))
+
+       paddw   xmm6,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+       psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+       paddw   xmm7,[GOTOFF(eax,PW_ONE)]
+       paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+       psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+       psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
+
+       paddw   xmm6,xmm5
+       paddw   xmm4,xmm2
+       paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+       paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+       paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+       paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+
+       movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+       movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
+
+       movdqa    xmm6,xmm5
+       movdqa    xmm7,xmm2
+       punpcklwd xmm5,xmm1
+       punpckhwd xmm6,xmm1
+       pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm6,[GOTOFF(eax,PW_MF0344_F0285)]
+       punpcklwd xmm2,xmm3
+       punpckhwd xmm7,xmm3
+       pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+       pmaddwd   xmm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+       paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm6,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm5,SCALEBITS
+       psrad     xmm6,SCALEBITS
+       paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+       paddd     xmm7,[GOTOFF(eax,PD_ONEHALF)]
+       psrad     xmm2,SCALEBITS
+       psrad     xmm7,SCALEBITS
+
+       packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+       packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+       psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+       psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+       movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
+
+       mov     ah,2                    ; YHctr
+       jmp     short .YHloop_1st
+       alignx  16,7
+
+.YHloop_2nd:
+       movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+       movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+       movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
+       alignx  16,7
+
+.YHloop_1st:
+       movdqa  XMMWORD [wk(3)], xmm0   ; wk(3)=(R-Y)(L/H)
+       movdqa  XMMWORD [wk(4)], xmm2   ; wk(4)=(G-Y)(L/H)
+       movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=(B-Y)(L/H)
+
+       movdqa  xmm7, XMMWORD [ebx]     ; xmm7=Y(0123456789ABCDEF)
+
+       mov     al,2                    ; YVctr
+       jmp     short .YVloop_1st
+       alignx  16,7
+
+.YVloop_2nd:
+       movdqa  xmm0, XMMWORD [wk(3)]   ; xmm0=(R-Y)(L/H)
+       movdqa  xmm2, XMMWORD [wk(4)]   ; xmm2=(G-Y)(L/H)
+       movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(B-Y)(L/H)
+
+       movdqa  xmm7, XMMWORD [edx]     ; xmm7=Y(0123456789ABCDEF)
+       alignx  16,7
+
+.YVloop_1st:
+       pcmpeqw xmm6,xmm6
+       psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+       pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+       psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
+
+       movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+       movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+       movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
+
+       paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+       paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+       packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+       packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+       paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+       paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+       packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+       packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+       paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+       paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+       packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+       packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+       punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+       movdqa    xmmG,xmmA
+       movdqa    xmmH,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+       punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+       psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+       psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+       movdqa    xmmC,xmmD
+       movdqa    xmmB,xmmD
+       punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+       punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+       psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+       movdqa    xmmF,xmmE
+       punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+       punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+       pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+       movdqa    xmmB,xmmE
+       punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+       punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+       punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+       pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+       movdqa    xmmB,xmmF
+       punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+       punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+       punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+       punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+       punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+       punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+       dec     al                      ; YVctr
+       jz      short .YVloop_break
+
+       movdqa  XMMWORD [wk(6)], xmmA
+       movdqa  XMMWORD [wk(7)], xmmD
+       movdqa  XMMWORD [wk(8)], xmmF
+
+       jmp     near .YVloop_2nd
+       alignx  16,7
+
+.YVloop_break:
+       movdqa  xmmH, XMMWORD [wk(6)]
+       movdqa  xmmB, XMMWORD [wk(7)]
+       movdqa  xmmE, XMMWORD [wk(8)]
+
+       pcmpeqb xmmG,xmmG       ; xmmG=(all 1's)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      near .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out01
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr1
+       jmp     short .out00
+.out01:        ; --(unaligned)-----------------
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmD,xmmG                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmF,xmmG                    ; movntdqu XMMWORD [edi], xmmF
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+.out00:
+       test    esi, SIZEOF_XMMWORD-1
+       jnz     short .out11
+       ; --(aligned)-------------------
+       movntdq XMMWORD [esi+0*SIZEOF_XMMWORD], xmmH
+       movntdq XMMWORD [esi+1*SIZEOF_XMMWORD], xmmB
+       movntdq XMMWORD [esi+2*SIZEOF_XMMWORD], xmmE
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr0
+       jmp     short .out10
+.out11:        ; --(unaligned)-----------------
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmH,xmmG                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmB,xmmG                    ; movntdqu XMMWORD [edi], xmmB
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmE,xmmG                    ; movntdqu XMMWORD [edi], xmmE
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+.out10:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .endcolumn
+
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr00
+       add     edx, byte SIZEOF_XMMWORD        ; inptr01
+       dec     ah                      ; YHctr
+       jnz     near .YHloop_2nd
+
+       push    ebx                             ; inptr00
+       push    edx                             ; inptr01
+       mov     ebx, JSAMPROW [inptr1]          ; ebx=inptr1
+       mov     edx, JSAMPROW [inptr2]          ; edx=inptr2
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+       cmp     ecx, byte 2*SIZEOF_XMMWORD
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmD,xmmG                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmH,xmmG                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmB,xmmG                    ; movntdqu XMMWORD [edi], xmmB
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+       movdqa  xmmA,xmmF
+       movdqa  xmmH,xmmE
+       sub     ecx, byte 2*SIZEOF_XMMWORD
+       jmp     short .column_st15
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmH,xmmG                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+       movdqa  xmmA,xmmD
+       movdqa  xmmH,xmmB
+       sub     ecx, byte SIZEOF_XMMWORD
+.column_st15:
+       mov     edx,ecx
+       xor     ecx, byte 0x0F
+       shl     ecx, 2
+       movd    xmmC,ecx
+       psrlq   xmmG,4
+       pcmpeqb xmmD,xmmD
+       psrlq   xmmG,xmmC
+       psrlq   xmmD,xmmC
+       punpcklbw xmmD,xmmG
+       movdqa    xmmB,xmmD
+       ; ================
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0a
+       lea     eax, [ecx+edx]
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0a
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmD,ecx
+       movdqa  xmmF,xmmA
+       movdqa  xmmE,xmmD
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmD, SIZEOF_XMMWORD/2
+       movd    xmmC,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1a
+       movd    xmmG,ecx
+       psllq   xmmA,xmmG
+       psllq   xmmD,xmmG
+       jmp     short .adj0a
+.adj1a:        neg     ecx
+       movd    xmmG,ecx
+       psrlq   xmmA,xmmG
+       psrlq   xmmD,xmmG
+       psllq   xmmF,xmmC
+       psllq   xmmE,xmmC
+       por     xmmA,xmmF
+       por     xmmD,xmmE
+.adj0a:        ; ----------------
+       maskmovdqu xmmA,xmmD                    ; movntdqu XMMWORD [edi], xmmA
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       ; ================
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0b
+       lea     eax, [ecx+edx]
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0b
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmH,ecx & pslldq xmmB,ecx
+       movdqa  xmmG,xmmH
+       movdqa  xmmC,xmmB
+       pslldq  xmmH, SIZEOF_XMMWORD/2
+       pslldq  xmmB, SIZEOF_XMMWORD/2
+       movd    xmmF,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1b
+       movd    xmmE,ecx
+       psllq   xmmH,xmmE
+       psllq   xmmB,xmmE
+       jmp     short .adj0b
+.adj1b:        neg     ecx
+       movd    xmmE,ecx
+       psrlq   xmmH,xmmE
+       psrlq   xmmB,xmmE
+       psllq   xmmG,xmmF
+       psllq   xmmC,xmmF
+       por     xmmH,xmmG
+       por     xmmB,xmmC
+.adj0b:        ; ----------------
+       maskmovdqu xmmH,xmmB                    ; movntdqu XMMWORD [edi], xmmH
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+       pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+       pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+       pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+       ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+       ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+       ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+       ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+       punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+       punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+       punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+       punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+       movdqa    xmmC,xmmA
+       punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+       punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+       movdqa    xmmG,xmmB
+       punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+       punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+       movdqa    xmmD,xmmA
+       punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+       punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+       movdqa    xmmH,xmmC
+       punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+       punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+       dec     al                      ; YVctr
+       jz      short .YVloop_break
+
+       movdqa  XMMWORD [wk(6)], xmmA
+       movdqa  XMMWORD [wk(7)], xmmD
+       movdqa  XMMWORD [wk(8)], xmmC
+       movdqa  XMMWORD [wk(9)], xmmH
+
+       jmp     near .YVloop_2nd
+       alignx  16,7
+
+.YVloop_break:
+       movdqa  xmmE, XMMWORD [wk(6)]
+       movdqa  xmmF, XMMWORD [wk(7)]
+       movdqa  xmmB, XMMWORD [wk(8)]
+
+       pcmpeqb xmmG,xmmG       ; xmmG=(all 1's)
+
+       cmp     ecx, byte SIZEOF_XMMWORD
+       jb      near .column_st32
+
+       test    edi, SIZEOF_XMMWORD-1
+       jnz     short .out01
+       ; --(aligned)-------------------
+       movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+       movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+       movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+       movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+       add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr1
+       jmp     short .out00
+.out01:        ; --(unaligned)-----------------
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmD,xmmG                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmC,xmmG                    ; movntdqu XMMWORD [edi], xmmC
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmH,xmmG                    ; movntdqu XMMWORD [edi], xmmH
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+.out00:
+       movdqa  xmmA, XMMWORD [wk(9)]
+
+       test    esi, SIZEOF_XMMWORD-1
+       jnz     short .out11
+       ; --(aligned)-------------------
+       movntdq XMMWORD [esi+0*SIZEOF_XMMWORD], xmmE
+       movntdq XMMWORD [esi+1*SIZEOF_XMMWORD], xmmF
+       movntdq XMMWORD [esi+2*SIZEOF_XMMWORD], xmmB
+       movntdq XMMWORD [esi+3*SIZEOF_XMMWORD], xmmA
+       add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr0
+       jmp     short .out10
+.out11:        ; --(unaligned)-----------------
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmE,xmmG                    ; movntdqu XMMWORD [edi], xmmE
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmF,xmmG                    ; movntdqu XMMWORD [edi], xmmF
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmB,xmmG                    ; movntdqu XMMWORD [edi], xmmB
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+.out10:
+       sub     ecx, byte SIZEOF_XMMWORD
+       jz      near .endcolumn
+
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr00
+       add     edx, byte SIZEOF_XMMWORD        ; inptr01
+       dec     ah                      ; YHctr
+       jnz     near .YHloop_2nd
+
+       push    ebx                             ; inptr00
+       push    edx                             ; inptr01
+       mov     ebx, JSAMPROW [inptr1]          ; ebx=inptr1
+       mov     edx, JSAMPROW [inptr2]          ; edx=inptr2
+       add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+       add     edx, byte SIZEOF_XMMWORD        ; inptr2
+       jmp     near .columnloop
+       alignx  16,7
+
+.column_st32:
+       cmp     ecx, byte SIZEOF_XMMWORD/2
+       jb      short .column_st16
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       maskmovdqu xmmD,xmmG                    ; movntdqu XMMWORD [edi], xmmD
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmE,xmmG                    ; movntdqu XMMWORD [edi], xmmE
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       maskmovdqu xmmF,xmmG                    ; movntdqu XMMWORD [edi], xmmF
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+       movdqa  xmmA,xmmC
+       movdqa  xmmD,xmmH
+       movdqa  xmmE,xmmB
+       movdqa  xmmF, XMMWORD [wk(9)]
+       sub     ecx, byte SIZEOF_XMMWORD/2
+.column_st16:
+       cmp     ecx, byte SIZEOF_XMMWORD/4
+       jb      short .column_st15
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       add     edi, byte SIZEOF_XMMWORD        ; outptr1
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       maskmovdqu xmmE,xmmG                    ; movntdqu XMMWORD [edi], xmmE
+       add     edi, byte SIZEOF_XMMWORD        ; outptr0
+       xchg    edi,esi                         ; edi=outptr1, esi=outptr0
+       movdqa  xmmA,xmmD
+       movdqa  xmmE,xmmF
+       sub     ecx, byte SIZEOF_XMMWORD/4
+.column_st15:
+       cmp     ecx, byte SIZEOF_XMMWORD/16
+       jb      near .endcolumn
+       mov     edx,ecx
+       xor     ecx, byte 0x03
+       inc     ecx
+       shl     ecx, 4
+       movd    xmmC,ecx
+       psrlq   xmmG,xmmC
+       punpcklbw xmmG,xmmG
+       movdqa    xmmH,xmmG
+       ; ================
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0a
+       lea     eax, [ecx+edx*4]        ; RGB_PIXELSIZE
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0a
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmA,ecx & pslldq xmmG,ecx
+       movdqa  xmmB,xmmA
+       movdqa  xmmD,xmmG
+       pslldq  xmmA, SIZEOF_XMMWORD/2
+       pslldq  xmmG, SIZEOF_XMMWORD/2
+       movd    xmmF,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1a
+       movd    xmmC,ecx
+       psllq   xmmA,xmmC
+       psllq   xmmG,xmmC
+       jmp     short .adj0a
+.adj1a:        neg     ecx
+       movd    xmmC,ecx
+       psrlq   xmmA,xmmC
+       psrlq   xmmG,xmmC
+       psllq   xmmB,xmmF
+       psllq   xmmD,xmmF
+       por     xmmA,xmmB
+       por     xmmG,xmmD
+.adj0a:        ; ----------------
+       maskmovdqu xmmA,xmmG                    ; movntdqu XMMWORD [edi], xmmA
+       xchg    edi,esi                         ; edi=outptr0, esi=outptr1
+       ; ================
+       mov     ecx,edi
+       and     ecx, byte SIZEOF_XMMWORD-1
+       jz      short .adj0b
+       lea     eax, [ecx+edx*4]        ; RGB_PIXELSIZE
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .adj0b
+       and     edi, byte (-SIZEOF_XMMWORD)     ; align to 16-byte boundary
+       shl     ecx, 3                  ; pslldq xmmE,ecx & pslldq xmmH,ecx
+       movdqa  xmmC,xmmE
+       movdqa  xmmF,xmmH
+       pslldq  xmmE, SIZEOF_XMMWORD/2
+       pslldq  xmmH, SIZEOF_XMMWORD/2
+       movd    xmmB,ecx
+       sub     ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
+       jb      short .adj1b
+       movd    xmmD,ecx
+       psllq   xmmE,xmmD
+       psllq   xmmH,xmmD
+       jmp     short .adj0b
+.adj1b:        neg     ecx
+       movd    xmmD,ecx
+       psrlq   xmmE,xmmD
+       psrlq   xmmH,xmmD
+       psllq   xmmC,xmmB
+       psllq   xmmF,xmmB
+       por     xmmE,xmmC
+       por     xmmH,xmmF
+.adj0b:        ; ----------------
+       maskmovdqu xmmE,xmmH                    ; movntdqu XMMWORD [edi], xmmE
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+       sfence          ; flush the write buffer
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; !USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2
+
+%endif ; JDMERGE_SSE2_SUPPORTED
+%endif ; UPSAMPLE_MERGING_SUPPORTED
+%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4
index 22678099451a7f606c5cb2652940d569ba7885d5..a1d92b7672dc954c2f3536397ffb27c71c52afe3 100644 (file)
--- a/jdphuff.c
+++ b/jdphuff.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : October 31, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains Huffman entropy decoding routines for progressive JPEG.
  *
  * Much of the complexity here has to do with supporting input suspension.
@@ -69,6 +76,7 @@ typedef struct {
   d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
 
   d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+  d_derived_tbl * dc_derived_tbls[MAX_COMPS_IN_SCAN];
 } phuff_entropy_decoder;
 
 typedef phuff_entropy_decoder * phuff_entropy_ptr;
@@ -168,6 +176,7 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo)
        tbl = compptr->dc_tbl_no;
        jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
                                & entropy->derived_tbls[tbl]);
+       entropy->dc_derived_tbls[ci] = entropy->derived_tbls[tbl];
       }
     } else {
       tbl = compptr->ac_tbl_no;
@@ -193,32 +202,6 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo)
 }
 
 
-/*
- * Figure F.12: extend sign bit.
- * On some machines, a shift and add will be faster than a table lookup.
- */
-
-#ifdef AVOID_TABLES
-
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
-
-#else
-
-#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
-
-static const int extend_test[16] =   /* entry n is 2**(n-1) */
-  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
-    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
-
-static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
-  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
-    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
-    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
-    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
-
-#endif /* AVOID_TABLES */
-
-
 /*
  * Check for a restart marker & resynchronize decoder.
  * Returns FALSE if must suspend.
@@ -284,16 +267,12 @@ process_restart (j_decompress_ptr cinfo)
 
 METHODDEF(boolean)
 decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Al = cinfo->Al;
-  register int s, r;
-  int blkn, ci;
-  JBLOCKROW block;
+  int blkn;
   BITREAD_STATE_VARS;
   savable_state state;
-  d_derived_tbl * tbl;
-  jpeg_component_info * compptr;
 
   /* Process restart marker if needed; may have to suspend */
   if (cinfo->restart_interval) {
@@ -314,21 +293,67 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
     /* Outer loop handles each block in the MCU */
 
     for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      block = MCU_data[blkn];
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+      JBLOCKROW block = MCU_data[blkn];
+      int ci = cinfo->MCU_membership[blkn];
+      d_derived_tbl * tbl = entropy->dc_derived_tbls[ci];
+      register int s;
 
       /* Decode a single block's worth of coefficients */
 
       /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
-      if (s) {
-       CHECK_BIT_BUFFER(br_state, s, return FALSE);
-       r = GET_BITS(s);
-       s = HUFF_EXTEND(r, s);
+      {                /* HUFFX_DECODE */
+       register int nb, look, t;
+       if (bits_left < HUFFX_LOOKAHEAD) {
+         register const JOCTET * next_input_byte = br_state.next_input_byte;
+         register size_t         bytes_in_buffer = br_state.bytes_in_buffer;
+         if (cinfo->unread_marker == 0) {
+           while (bits_left < MIN_GET_BITS) {
+             register int c;
+             if (bytes_in_buffer == 0 ||
+                 (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+               goto label11; }
+             bytes_in_buffer--; next_input_byte++;
+             get_buffer = (get_buffer << 8) | c;
+             bits_left += 8;
+           }
+           br_state.next_input_byte = next_input_byte;
+           br_state.bytes_in_buffer = bytes_in_buffer;
+         } else {
+       label11:
+           br_state.next_input_byte = next_input_byte;
+           br_state.bytes_in_buffer = bytes_in_buffer;
+           if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) {
+             return FALSE; }
+           get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+           if (bits_left < HUFFX_LOOKAHEAD) {
+             nb = 1; goto label1;
+           }
+         }
+       }
+       look = PEEK_BITS(HUFFX_LOOKAHEAD);
+       if ((nb = tbl->lookx_nbits[look]) != 0) {
+         s = tbl->lookx_val[look];
+         if (nb <= HUFFX_LOOKAHEAD) {
+           DROP_BITS(nb);
+         } else {
+           DROP_BITS(HUFFX_LOOKAHEAD);
+           nb -= HUFFX_LOOKAHEAD;
+           CHECK_BIT_BUFFER(br_state, nb, return FALSE);
+           s += GET_BITS(nb);
+         }
+       } else {
+         nb = HUFFX_LOOKAHEAD;
+      label1:
+         if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb))
+              < 0) { return FALSE; }
+         get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+         if (s) {
+           CHECK_BIT_BUFFER(br_state, s, return FALSE);
+           t = GET_BITS(s);
+           s = HUFF_EXTEND(t, s);
+         }
+       }
       }
-
       /* Convert DC difference to actual value, update last_dc_val */
       s += state.last_dc_val[ci];
       state.last_dc_val[ci] = s;
@@ -355,15 +380,12 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
 METHODDEF(boolean)
 decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
   int Al = cinfo->Al;
-  register int s, k, r;
   unsigned int EOBRUN;
-  JBLOCKROW block;
   BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
 
   /* Process restart marker if needed; may have to suspend */
   if (cinfo->restart_interval) {
@@ -384,22 +406,74 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
     /* There is always only one block per MCU */
 
-    if (EOBRUN > 0)            /* if it's a band of zeroes... */
+    if (EOBRUN > 0) {          /* if it's a band of zeroes... */
       EOBRUN--;                        /* ...process it now (we do nothing) */
-    else {
+    } else {
+      JBLOCKROW block = MCU_data[0];
+      d_derived_tbl * tbl = entropy->ac_derived_tbl;
+      register int s, k, r;
+
+      /* Load up working state */
       BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-      block = MCU_data[0];
-      tbl = entropy->ac_derived_tbl;
 
       for (k = cinfo->Ss; k <= Se; k++) {
-       HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-       r = s >> 4;
-       s &= 15;
+       {       /* HUFFX_DECODE */
+         register int nb, look, t;
+         if (bits_left < HUFFX_LOOKAHEAD) {
+           register const JOCTET * next_input_byte = br_state.next_input_byte;
+           register size_t         bytes_in_buffer = br_state.bytes_in_buffer;
+           if (cinfo->unread_marker == 0) {
+             while (bits_left < MIN_GET_BITS) {
+               register int c;
+               if (bytes_in_buffer == 0 ||
+                   (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+                 goto label21; }
+               bytes_in_buffer--; next_input_byte++;
+               get_buffer = (get_buffer << 8) | c;
+               bits_left += 8;
+             }
+             br_state.next_input_byte = next_input_byte;
+             br_state.bytes_in_buffer = bytes_in_buffer;
+           } else {
+         label21:
+             br_state.next_input_byte = next_input_byte;
+             br_state.bytes_in_buffer = bytes_in_buffer;
+             if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) {
+               return FALSE; }
+             get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+             if (bits_left < HUFFX_LOOKAHEAD) {
+               nb = 1; goto label2;
+             }
+           }
+         }
+         look = PEEK_BITS(HUFFX_LOOKAHEAD);
+         if ((nb = tbl->lookx_nbits[look]) != 0) {
+           s = tbl->lookx_val[look];
+           r = tbl->lookx_sym[look] >> 4;
+           if (nb <= HUFFX_LOOKAHEAD) {
+             DROP_BITS(nb);
+           } else {
+             DROP_BITS(HUFFX_LOOKAHEAD);
+             nb -= HUFFX_LOOKAHEAD;
+             CHECK_BIT_BUFFER(br_state, nb, return FALSE);
+             s += GET_BITS(nb);
+           }
+         } else {
+           nb = HUFFX_LOOKAHEAD;
+       label2:
+           if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb))
+                < 0) { return FALSE; }
+           get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+           r = s >> 4; s &= 15;
+           if (s) {
+             CHECK_BIT_BUFFER(br_state, s, return FALSE);
+             t = GET_BITS(s);
+             s = HUFF_EXTEND(t, s);
+           }
+         }
+       }
        if (s) {
          k += r;
-         CHECK_BIT_BUFFER(br_state, s, return FALSE);
-         r = GET_BITS(s);
-         s = HUFF_EXTEND(r, s);
          /* Scale and output coefficient in natural (dezigzagged) order */
          (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
        } else {
@@ -440,11 +514,10 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
 METHODDEF(boolean)
 decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int p1 = 1 << cinfo->Al;     /* 1 in the bit position being coded */
   int blkn;
-  JBLOCKROW block;
   BITREAD_STATE_VARS;
 
   /* Process restart marker if needed; may have to suspend */
@@ -464,7 +537,7 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   /* Outer loop handles each block in the MCU */
 
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
+    JBLOCKROW block = MCU_data[blkn];
 
     /* Encoded data is simply the next bit of the two's-complement DC value */
     CHECK_BIT_BUFFER(br_state, 1, return FALSE);
@@ -489,17 +562,17 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
 METHODDEF(boolean)
 decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
-  int p1 = 1 << cinfo->Al;     /* 1 in the bit position being coded */
-  int m1 = (-1) << cinfo->Al;  /* -1 in the bit position being coded */
+  int Al = cinfo->Al;
   register int s, k, r;
   unsigned int EOBRUN;
   JBLOCKROW block;
   JCOEFPTR thiscoef;
   BITREAD_STATE_VARS;
   d_derived_tbl * tbl;
+  int pm1[2];
   int num_newnz;
   int newnz_pos[DCTSIZE2];
 
@@ -522,6 +595,13 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
     block = MCU_data[0];
     tbl = entropy->ac_derived_tbl;
 
+    /* The pm1[] array is indexed by a value from relational operator.
+     * This method eliminates conditional branches depending on random data,
+     * which result in lower performance on recent processors.
+     */
+    pm1[0] =   1  << cinfo->Al;        /* +1 in the bit position being coded */
+    pm1[1] = (-1) << cinfo->Al;        /* -1 in the bit position being coded */
+
     /* If we are forced to suspend, we must undo the assignments to any newly
      * nonzero coefficients in the block, because otherwise we'd get confused
      * next time about which coefficients were already nonzero.
@@ -535,18 +615,63 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
     if (EOBRUN == 0) {
       for (; k <= Se; k++) {
-       HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-       r = s >> 4;
-       s &= 15;
-       if (s) {
-         if (s != 1)           /* size of new coef should always be 1 */
-           WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-         CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-         if (GET_BITS(1))
-           s = p1;             /* newly nonzero coef is positive */
-         else
-           s = m1;             /* newly nonzero coef is negative */
-       } else {
+       {       /* HUFFX_DECODE */
+         register int nb, look, t;
+         if (bits_left < HUFFX_LOOKAHEAD) {
+           register const JOCTET * next_input_byte = br_state.next_input_byte;
+           register size_t         bytes_in_buffer = br_state.bytes_in_buffer;
+           if (cinfo->unread_marker == 0) {
+             while (bits_left < MIN_GET_BITS) {
+               register int c;
+               if (bytes_in_buffer == 0 ||
+                   (c = GETJOCTET(*next_input_byte)) == 0xFF) {
+                 goto label31; }
+               bytes_in_buffer--; next_input_byte++;
+               get_buffer = (get_buffer << 8) | c;
+               bits_left += 8;
+             }
+             br_state.next_input_byte = next_input_byte;
+             br_state.bytes_in_buffer = bytes_in_buffer;
+           } else {
+         label31:
+             br_state.next_input_byte = next_input_byte;
+             br_state.bytes_in_buffer = bytes_in_buffer;
+             if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) {
+               goto undoit; }
+             get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+             if (bits_left < HUFFX_LOOKAHEAD) {
+               nb = 1; goto label3;
+             }
+           }
+         }
+         look = PEEK_BITS(HUFFX_LOOKAHEAD);
+         if ((nb = tbl->lookx_nbits[look]) != 0) {
+           t = tbl->lookx_sym[look];
+           s = tbl->lookx_val[look];
+           r = t >> 4; t &= 15;
+           if (t <= 1) {
+             DROP_BITS(nb);
+           } else {              /* size of new coef should always be 1 */
+             WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+             DROP_BITS(nb - (t - 1));
+             s = (s >= 0) ? 1 : -1;
+           }
+         } else {
+           nb = HUFFX_LOOKAHEAD;
+       label3:
+           if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb))
+                < 0) { goto undoit; }
+           get_buffer = br_state.get_buffer; bits_left = br_state.bits_left;
+           r = s >> 4; s &= 15;
+           if (s) {
+             if (s != 1)           /* size of new coef should always be 1 */
+               WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+             CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+             s = GET_BITS(1) ? 1 : -1;
+           }
+         }
+       }
+       if (s == 0) {
          if (r != 15) {
            EOBRUN = 1 << r;    /* EOBr, run length is 2^r + appended bits */
            if (r) {
@@ -567,12 +692,8 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
          if (*thiscoef != 0) {
            CHECK_BIT_BUFFER(br_state, 1, goto undoit);
            if (GET_BITS(1)) {
-             if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-               if (*thiscoef >= 0)
-                 *thiscoef += p1;
-               else
-                 *thiscoef += m1;
-             }
+             if ((*thiscoef & pm1[0]) == 0) /* do nothing if already set it */
+               *thiscoef += pm1[(*thiscoef < 0)];
            }
          } else {
            if (--r < 0)
@@ -583,7 +704,7 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
        if (s) {
          int pos = jpeg_natural_order[k];
          /* Output newly nonzero coefficient */
-         (*block)[pos] = (JCOEF) s;
+         (*block)[pos] = (JCOEF) (s << Al);
          /* Remember its position in case we have to suspend */
          newnz_pos[num_newnz++] = pos;
        }
@@ -601,12 +722,8 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
        if (*thiscoef != 0) {
          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
          if (GET_BITS(1)) {
-           if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-             if (*thiscoef >= 0)
-               *thiscoef += p1;
-             else
-               *thiscoef += m1;
-           }
+           if ((*thiscoef & pm1[0]) == 0)  /* do nothing if already set it */
+             *thiscoef += pm1[(*thiscoef < 0)];
          }
        }
       }
diff --git a/jdsammmx.asm b/jdsammmx.asm
new file mode 100644 (file)
index 0000000..bb17d37
--- /dev/null
@@ -0,0 +1,893 @@
+;
+; jdsammmx.asm - upsampling (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fancy_upsample_mmx)
+
+EXTN(jconst_fancy_upsample_mmx):
+
+PW_ONE         times 4 dw  1
+PW_TWO         times 4 dw  2
+PW_THREE       times 4 dw  3
+PW_SEVEN       times 4 dw  7
+PW_EIGHT       times 4 dw  8
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+;
+; The upsampling algorithm is linear interpolation between pixel centers,
+; also known as a "triangle filter".  This is a good compromise between
+; speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+; of the way between input pixel centers.
+;
+; GLOBAL(void)
+; jpeg_h2v1_fancy_upsample_mmx (j_decompress_ptr cinfo,
+;                               jpeg_component_info * compptr,
+;                               JSAMPARRAY input_data,
+;                               JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_fancy_upsample_mmx)
+
+EXTN(jpeg_h2v1_fancy_upsample_mmx):
+       push    ebp
+       mov     ebp,esp
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       test    eax,eax
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                     ; colctr
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]     ; inptr
+       mov     edi, JSAMPROW [edi]     ; outptr
+
+       test    eax, SIZEOF_MMWORD-1
+       jz      short .skip
+       mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+.skip:
+       pxor    mm0,mm0                 ; mm0=(all 0's)
+       pcmpeqb mm7,mm7
+       psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
+       pand    mm7, MMWORD [esi+0*SIZEOF_MMWORD]
+
+       add     eax, byte SIZEOF_MMWORD-1
+       and     eax, byte -SIZEOF_MMWORD
+       cmp     eax, byte SIZEOF_MMWORD
+       ja      short .columnloop
+       alignx  16,7
+
+.columnloop_last:
+       pcmpeqb mm6,mm6
+       psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+       pand    mm6, MMWORD [esi+0*SIZEOF_MMWORD]
+       jmp     short .upsample
+       alignx  16,7
+
+.columnloop:
+       movq    mm6, MMWORD [esi+1*SIZEOF_MMWORD]
+       psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+
+.upsample:
+       movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+       movq    mm2,mm1
+       movq    mm3,mm1                 ; mm1=( 0 1 2 3 4 5 6 7)
+       psllq   mm2,BYTE_BIT            ; mm2=( - 0 1 2 3 4 5 6)
+       psrlq   mm3,BYTE_BIT            ; mm3=( 1 2 3 4 5 6 7 -)
+
+       por     mm2,mm7                 ; mm2=(-1 0 1 2 3 4 5 6)
+       por     mm3,mm6                 ; mm3=( 1 2 3 4 5 6 7 8)
+
+       movq    mm7,mm1
+       psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT  ; mm7=( 7 - - - - - - -)
+
+       movq      mm4,mm1
+       punpcklbw mm1,mm0               ; mm1=( 0 1 2 3)
+       punpckhbw mm4,mm0               ; mm4=( 4 5 6 7)
+       movq      mm5,mm2
+       punpcklbw mm2,mm0               ; mm2=(-1 0 1 2)
+       punpckhbw mm5,mm0               ; mm5=( 3 4 5 6)
+       movq      mm6,mm3
+       punpcklbw mm3,mm0               ; mm3=( 1 2 3 4)
+       punpckhbw mm6,mm0               ; mm6=( 5 6 7 8)
+
+       pmullw  mm1,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   mm2,[GOTOFF(ebx,PW_ONE)]
+       paddw   mm5,[GOTOFF(ebx,PW_ONE)]
+       paddw   mm3,[GOTOFF(ebx,PW_TWO)]
+       paddw   mm6,[GOTOFF(ebx,PW_TWO)]
+
+       paddw   mm2,mm1
+       paddw   mm5,mm4
+       psrlw   mm2,2                   ; mm2=OutLE=( 0  2  4  6)
+       psrlw   mm5,2                   ; mm5=OutHE=( 8 10 12 14)
+       paddw   mm3,mm1
+       paddw   mm6,mm4
+       psrlw   mm3,2                   ; mm3=OutLO=( 1  3  5  7)
+       psrlw   mm6,2                   ; mm6=OutHO=( 9 11 13 15)
+
+       psllw   mm3,BYTE_BIT
+       psllw   mm6,BYTE_BIT
+       por     mm2,mm3                 ; mm2=OutL=( 0  1  2  3  4  5  6  7)
+       por     mm5,mm6                 ; mm5=OutH=( 8  9 10 11 12 13 14 15)
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mm5
+
+       sub     eax, byte SIZEOF_MMWORD
+       add     esi, byte 1*SIZEOF_MMWORD       ; inptr
+       add     edi, byte 2*SIZEOF_MMWORD       ; outptr
+       cmp     eax, byte SIZEOF_MMWORD
+       ja      near .columnloop
+       test    eax,eax
+       jnz     near .columnloop_last
+
+       pop     esi
+       pop     edi
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     ecx                             ; rowctr
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jpeg_h2v2_fancy_upsample_mmx (j_decompress_ptr cinfo,
+;                               jpeg_component_info * compptr,
+;                               JSAMPARRAY input_data,
+;                               JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         4
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_fancy_upsample_mmx)
+
+EXTN(jpeg_h2v2_fancy_upsample_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     edx,eax                         ; edx = original ebp
+       mov     eax, POINTER [compptr(edx)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       test    eax,eax
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(edx)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(edx)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                                     ; colctr
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+       test    eax, SIZEOF_MMWORD-1
+       jz      short .skip
+       push    edx
+       mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+       mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+       mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+       pop     edx
+.skip:
+       ; -- process the first column block
+
+       movq    mm0, MMWORD [ebx+0*SIZEOF_MMWORD]       ; mm0=row[ 0][0]
+       movq    mm1, MMWORD [ecx+0*SIZEOF_MMWORD]       ; mm1=row[-1][0]
+       movq    mm2, MMWORD [esi+0*SIZEOF_MMWORD]       ; mm2=row[+1][0]
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pxor      mm3,mm3               ; mm3=(all 0's)
+       movq      mm4,mm0
+       punpcklbw mm0,mm3               ; mm0=row[ 0][0]( 0 1 2 3)
+       punpckhbw mm4,mm3               ; mm4=row[ 0][0]( 4 5 6 7)
+       movq      mm5,mm1
+       punpcklbw mm1,mm3               ; mm1=row[-1][0]( 0 1 2 3)
+       punpckhbw mm5,mm3               ; mm5=row[-1][0]( 4 5 6 7)
+       movq      mm6,mm2
+       punpcklbw mm2,mm3               ; mm2=row[+1][0]( 0 1 2 3)
+       punpckhbw mm6,mm3               ; mm6=row[+1][0]( 4 5 6 7)
+
+       pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+
+       pcmpeqb mm7,mm7
+       psrlq   mm7,(SIZEOF_MMWORD-2)*BYTE_BIT
+
+       paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+       paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+       paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+       paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
+
+       movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1       ; temporarily save
+       movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5       ; the intermediate data
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mm6
+
+       pand    mm1,mm7                 ; mm1=( 0 - - -)
+       pand    mm2,mm7                 ; mm2=( 0 - - -)
+
+       movq    MMWORD [wk(0)], mm1
+       movq    MMWORD [wk(1)], mm2
+
+       poppic  ebx
+
+       add     eax, byte SIZEOF_MMWORD-1
+       and     eax, byte -SIZEOF_MMWORD
+       cmp     eax, byte SIZEOF_MMWORD
+       ja      short .columnloop
+       alignx  16,7
+
+.columnloop_last:
+       ; -- process the last column block
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pcmpeqb mm1,mm1
+       psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
+       movq    mm2,mm1
+
+       pand    mm1, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm1=( - - - 7)
+       pand    mm2, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm2=( - - - 7)
+
+       movq    MMWORD [wk(2)], mm1
+       movq    MMWORD [wk(3)], mm2
+
+       jmp     short .upsample
+       alignx  16,7
+
+.columnloop:
+       ; -- process the next column block
+
+       movq    mm0, MMWORD [ebx+1*SIZEOF_MMWORD]       ; mm0=row[ 0][1]
+       movq    mm1, MMWORD [ecx+1*SIZEOF_MMWORD]       ; mm1=row[-1][1]
+       movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]       ; mm2=row[+1][1]
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pxor      mm3,mm3               ; mm3=(all 0's)
+       movq      mm4,mm0
+       punpcklbw mm0,mm3               ; mm0=row[ 0][1]( 0 1 2 3)
+       punpckhbw mm4,mm3               ; mm4=row[ 0][1]( 4 5 6 7)
+       movq      mm5,mm1
+       punpcklbw mm1,mm3               ; mm1=row[-1][1]( 0 1 2 3)
+       punpckhbw mm5,mm3               ; mm5=row[-1][1]( 4 5 6 7)
+       movq      mm6,mm2
+       punpcklbw mm2,mm3               ; mm2=row[+1][1]( 0 1 2 3)
+       punpckhbw mm6,mm3               ; mm6=row[+1][1]( 4 5 6 7)
+
+       pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+
+       paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+       paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+       paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+       paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
+
+       movq    MMWORD [edx+2*SIZEOF_MMWORD], mm1       ; temporarily save
+       movq    MMWORD [edx+3*SIZEOF_MMWORD], mm5       ; the intermediate data
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mm6
+
+       psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm1=( - - - 0)
+       psllq   mm2,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm2=( - - - 0)
+
+       movq    MMWORD [wk(2)], mm1
+       movq    MMWORD [wk(3)], mm2
+
+.upsample:
+       ; -- process the upper row
+
+       movq    mm7, MMWORD [edx+0*SIZEOF_MMWORD]       ; mm7=Int0L=( 0 1 2 3)
+       movq    mm3, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm3=Int0H=( 4 5 6 7)
+
+       movq    mm0,mm7
+       movq    mm4,mm3
+       psrlq   mm0,2*BYTE_BIT                  ; mm0=( 1 2 3 -)
+       psllq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( - - - 4)
+       movq    mm5,mm7
+       movq    mm6,mm3
+       psrlq   mm5,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm5=( 3 - - -)
+       psllq   mm6,2*BYTE_BIT                  ; mm6=( - 4 5 6)
+
+       por     mm0,mm4                         ; mm0=( 1 2 3 4)
+       por     mm5,mm6                         ; mm5=( 3 4 5 6)
+
+       movq    mm1,mm7
+       movq    mm2,mm3
+       psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+       psrlq   mm2,2*BYTE_BIT                  ; mm2=( 5 6 7 -)
+       movq    mm4,mm3
+       psrlq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( 7 - - -)
+
+       por     mm1, MMWORD [wk(0)]             ; mm1=(-1 0 1 2)
+       por     mm2, MMWORD [wk(2)]             ; mm2=( 5 6 7 8)
+
+       movq    MMWORD [wk(0)], mm4
+
+       pmullw  mm7,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm3,[GOTOFF(ebx,PW_THREE)]
+       paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   mm5,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   mm0,[GOTOFF(ebx,PW_SEVEN)]
+       paddw   mm2,[GOTOFF(ebx,PW_SEVEN)]
+
+       paddw   mm1,mm7
+       paddw   mm5,mm3
+       psrlw   mm1,4                   ; mm1=Out0LE=( 0  2  4  6)
+       psrlw   mm5,4                   ; mm5=Out0HE=( 8 10 12 14)
+       paddw   mm0,mm7
+       paddw   mm2,mm3
+       psrlw   mm0,4                   ; mm0=Out0LO=( 1  3  5  7)
+       psrlw   mm2,4                   ; mm2=Out0HO=( 9 11 13 15)
+
+       psllw   mm0,BYTE_BIT
+       psllw   mm2,BYTE_BIT
+       por     mm1,mm0                 ; mm1=Out0L=( 0  1  2  3  4  5  6  7)
+       por     mm5,mm2                 ; mm5=Out0H=( 8  9 10 11 12 13 14 15)
+
+       movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1
+       movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5
+
+       ; -- process the lower row
+
+       movq    mm6, MMWORD [edi+0*SIZEOF_MMWORD]       ; mm6=Int1L=( 0 1 2 3)
+       movq    mm4, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm4=Int1H=( 4 5 6 7)
+
+       movq    mm7,mm6
+       movq    mm3,mm4
+       psrlq   mm7,2*BYTE_BIT                  ; mm7=( 1 2 3 -)
+       psllq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( - - - 4)
+       movq    mm0,mm6
+       movq    mm2,mm4
+       psrlq   mm0,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm0=( 3 - - -)
+       psllq   mm2,2*BYTE_BIT                  ; mm2=( - 4 5 6)
+
+       por     mm7,mm3                         ; mm7=( 1 2 3 4)
+       por     mm0,mm2                         ; mm0=( 3 4 5 6)
+
+       movq    mm1,mm6
+       movq    mm5,mm4
+       psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+       psrlq   mm5,2*BYTE_BIT                  ; mm5=( 5 6 7 -)
+       movq    mm3,mm4
+       psrlq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( 7 - - -)
+
+       por     mm1, MMWORD [wk(1)]             ; mm1=(-1 0 1 2)
+       por     mm5, MMWORD [wk(3)]             ; mm5=( 5 6 7 8)
+
+       movq    MMWORD [wk(1)], mm3
+
+       pmullw  mm6,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   mm0,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   mm7,[GOTOFF(ebx,PW_SEVEN)]
+       paddw   mm5,[GOTOFF(ebx,PW_SEVEN)]
+
+       paddw   mm1,mm6
+       paddw   mm0,mm4
+       psrlw   mm1,4                   ; mm1=Out1LE=( 0  2  4  6)
+       psrlw   mm0,4                   ; mm0=Out1HE=( 8 10 12 14)
+       paddw   mm7,mm6
+       paddw   mm5,mm4
+       psrlw   mm7,4                   ; mm7=Out1LO=( 1  3  5  7)
+       psrlw   mm5,4                   ; mm5=Out1HO=( 9 11 13 15)
+
+       psllw   mm7,BYTE_BIT
+       psllw   mm5,BYTE_BIT
+       por     mm1,mm7                 ; mm1=Out1L=( 0  1  2  3  4  5  6  7)
+       por     mm0,mm5                 ; mm0=Out1H=( 8  9 10 11 12 13 14 15)
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm1
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mm0
+
+       poppic  ebx
+
+       sub     eax, byte SIZEOF_MMWORD
+       add     ecx, byte 1*SIZEOF_MMWORD       ; inptr1(above)
+       add     ebx, byte 1*SIZEOF_MMWORD       ; inptr0
+       add     esi, byte 1*SIZEOF_MMWORD       ; inptr1(below)
+       add     edx, byte 2*SIZEOF_MMWORD       ; outptr0
+       add     edi, byte 2*SIZEOF_MMWORD       ; outptr1
+       cmp     eax, byte SIZEOF_MMWORD
+       ja      near .columnloop
+       test    eax,eax
+       jnz     near .columnloop_last
+
+       pop     esi
+       pop     edi
+       pop     ecx
+       pop     eax
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%ifdef UPSAMPLE_H1V2_SUPPORTED
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 1:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jpeg_h1v2_fancy_upsample_mmx (j_decompress_ptr cinfo,
+;                               jpeg_component_info * compptr,
+;                               JSAMPARRAY input_data,
+;                               JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define gotptr         ebp-SIZEOF_POINTER      ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h1v2_fancy_upsample_mmx)
+
+EXTN(jpeg_h1v2_fancy_upsample_mmx):
+       push    ebp
+       mov     ebp,esp
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       add     eax, byte SIZEOF_MMWORD-1
+       and     eax, byte -SIZEOF_MMWORD
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                                     ; colctr
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+       pxor    mm0,mm0                 ; mm0=(all 0's)
+       alignx  16,7
+
+.columnloop:
+       movq    mm1, MMWORD [ebx]       ; mm1=row[ 0]( 0 1 2 3 4 5 6 7)
+       movq    mm2, MMWORD [ecx]       ; mm2=row[-1]( 0 1 2 3 4 5 6 7)
+       movq    mm3, MMWORD [esi]       ; mm3=row[+1]( 0 1 2 3 4 5 6 7)
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       movq      mm4,mm1
+       punpcklbw mm1,mm0               ; mm1=row[ 0]( 0 1 2 3)
+       punpckhbw mm4,mm0               ; mm4=row[ 0]( 4 5 6 7)
+       movq      mm5,mm2
+       punpcklbw mm2,mm0               ; mm2=row[-1]( 0 1 2 3)
+       punpckhbw mm5,mm0               ; mm5=row[-1]( 4 5 6 7)
+       movq      mm6,mm3
+       punpcklbw mm3,mm0               ; mm3=row[+1]( 0 1 2 3)
+       punpckhbw mm6,mm0               ; mm6=row[+1]( 4 5 6 7)
+
+       pmullw  mm1,[GOTOFF(ebx,PW_THREE)]
+       pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   mm2,[GOTOFF(ebx,PW_ONE)]
+       paddw   mm5,[GOTOFF(ebx,PW_ONE)]
+       paddw   mm3,[GOTOFF(ebx,PW_TWO)]
+       paddw   mm6,[GOTOFF(ebx,PW_TWO)]
+
+       paddw   mm2,mm1
+       paddw   mm5,mm4
+       psrlw   mm2,2                   ; mm2=Out0L=( 0 1 2 3)
+       psrlw   mm5,2                   ; mm5=Out0H=( 4 5 6 7)
+       paddw   mm3,mm1
+       paddw   mm6,mm4
+       psrlw   mm3,2                   ; mm3=Out1L=( 0 1 2 3)
+       psrlw   mm6,2                   ; mm6=Out1H=( 4 5 6 7)
+
+       packuswb  mm2,mm5               ; mm2=Out0=( 0 1 2 3 4 5 6 7)
+       packuswb  mm3,mm6               ; mm3=Out1=( 0 1 2 3 4 5 6 7)
+
+       movq    MMWORD [edx], mm2
+       movq    MMWORD [edi], mm3
+
+       poppic  ebx
+
+       add     ecx, byte 1*SIZEOF_MMWORD       ; inptr1(above)
+       add     ebx, byte 1*SIZEOF_MMWORD       ; inptr0
+       add     esi, byte 1*SIZEOF_MMWORD       ; inptr1(below)
+       add     edx, byte 1*SIZEOF_MMWORD       ; outptr0
+       add     edi, byte 1*SIZEOF_MMWORD       ; outptr1
+       sub     eax, byte SIZEOF_MMWORD
+       jnz     near .columnloop
+
+       pop     esi
+       pop     edi
+       pop     ecx
+       pop     eax
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      near .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       poppic  eax             ; remove gotptr
+       pop     ebp
+       ret
+
+%endif ; UPSAMPLE_H1V2_SUPPORTED
+%endif ; JDSAMPLE_FANCY_MMX_SUPPORTED
+
+%ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED
+
+%ifndef JDSAMPLE_FANCY_MMX_SUPPORTED
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+%endif
+;
+; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jpeg_h2v1_upsample_mmx (j_decompress_ptr cinfo,
+;                         jpeg_component_info * compptr,
+;                         JSAMPARRAY input_data,
+;                         JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_upsample_mmx)
+
+EXTN(jpeg_h2v1_upsample_mmx):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jdstruct_output_width(edx)]
+       add     edx, byte (2*SIZEOF_MMWORD)-1
+       and     edx, byte -(2*SIZEOF_MMWORD)
+       jz      short .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      short .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]             ; inptr
+       mov     edi, JSAMPROW [edi]             ; outptr
+       mov     eax,edx                         ; colctr
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+
+       movq      mm1,mm0
+       punpcklbw mm0,mm0
+       punpckhbw mm1,mm1
+
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
+
+       sub     eax, byte 2*SIZEOF_MMWORD
+       jz      short .nextrow
+
+       movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+
+       movq      mm3,mm2
+       punpcklbw mm2,mm2
+       punpckhbw mm3,mm3
+
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
+
+       sub     eax, byte 2*SIZEOF_MMWORD
+       jz      short .nextrow
+
+       add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+       add     edi, byte 4*SIZEOF_MMWORD       ; outptr
+       jmp     short .columnloop
+       alignx  16,7
+
+.nextrow:
+       pop     esi
+       pop     edi
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     ecx                             ; rowctr
+       jg      short .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jpeg_h2v2_upsample_mmx (j_decompress_ptr cinfo,
+;                         jpeg_component_info * compptr,
+;                         JSAMPARRAY input_data,
+;                         JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_upsample_mmx)
+
+EXTN(jpeg_h2v2_upsample_mmx):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jdstruct_output_width(edx)]
+       add     edx, byte (2*SIZEOF_MMWORD)-1
+       and     edx, byte -(2*SIZEOF_MMWORD)
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      short .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]                     ; inptr
+       mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+       mov     eax,edx                                 ; colctr
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+
+       movq      mm1,mm0
+       punpcklbw mm0,mm0
+       punpckhbw mm1,mm1
+
+       movq    MMWORD [ebx+0*SIZEOF_MMWORD], mm0
+       movq    MMWORD [ebx+1*SIZEOF_MMWORD], mm1
+       movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+       movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
+
+       sub     eax, byte 2*SIZEOF_MMWORD
+       jz      short .nextrow
+
+       movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+
+       movq      mm3,mm2
+       punpcklbw mm2,mm2
+       punpckhbw mm3,mm3
+
+       movq    MMWORD [ebx+2*SIZEOF_MMWORD], mm2
+       movq    MMWORD [ebx+3*SIZEOF_MMWORD], mm3
+       movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+       movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
+
+       sub     eax, byte 2*SIZEOF_MMWORD
+       jz      short .nextrow
+
+       add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+       add     ebx, byte 4*SIZEOF_MMWORD       ; outptr0
+       add     edi, byte 4*SIZEOF_MMWORD       ; outptr1
+       jmp     short .columnloop
+       alignx  16,7
+
+.nextrow:
+       pop     esi
+       pop     edi
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      short .rowloop
+
+       emms            ; empty MMX state
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; JDSAMPLE_SIMPLE_MMX_SUPPORTED
index 80ffefb2a1ccf5ddc1530b921df0eef6e3e45c18..37a6cee2ed2d584e9c252e8685f765fdaba2ad67 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 5, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file contains upsampling routines.
  *
  * Upsampling input data is counted in "row groups".  A row group
@@ -21,6 +28,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcolsamp.h"          /* Private declarations */
 
 
 /* Pointer to routine to upsample a single component */
@@ -285,6 +293,37 @@ h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 }
 
 
+#ifdef UPSAMPLE_H1V2_SUPPORTED
+
+/*
+ * Fast processing for the common case of 1:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ *
+ * SIMD Ext: This routine is for files that are rotated or transposed
+ *           by jpegtran.
+ */
+
+METHODDEF(void)
+h1v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+              JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    jcopy_sample_rows(input_data, inrow, output_data, outrow,
+                     1, cinfo->output_width);
+    jcopy_sample_rows(input_data, inrow, output_data, outrow+1,
+                     1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+#endif /* UPSAMPLE_H1V2_SUPPORTED */
+
+
 /*
  * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
  *
@@ -391,6 +430,52 @@ h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 }
 
 
+#ifdef UPSAMPLE_H1V2_SUPPORTED
+
+/*
+ * Fancy processing for the common case of 1:1 horizontal and 2:1 vertical.
+ * Again a triangle filter; see comments for h2v1 case, above.
+ *
+ * It is OK for us to reference the adjacent input rows because we demanded
+ * context from the main buffer controller (see initialization code).
+ *
+ * SIMD Ext: This routine is for files that are rotated or transposed
+ *           by jpegtran.
+ */
+
+METHODDEF(void)
+h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                    JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int colsum;
+  register JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0)              /* next nearest is row above */
+       inptr1 = input_data[inrow-1];
+      else                     /* next nearest is row below */
+       inptr1 = input_data[inrow+1];
+      outptr = output_data[outrow++];
+
+      for (colctr = compptr->downsampled_width; colctr > 0; colctr--) {
+       colsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+       *outptr++ = (JSAMPLE) ((colsum + v + 1) >> 2);
+      }
+    }
+    inrow++;
+  }
+}
+
+#endif /* UPSAMPLE_H1V2_SUPPORTED */
+
+
 /*
  * Module initialization routine for upsampling.
  */
@@ -403,6 +488,7 @@ jinit_upsampler (j_decompress_ptr cinfo)
   jpeg_component_info * compptr;
   boolean need_buffer, do_fancy;
   int h_in_group, v_in_group, h_out_group, v_out_group;
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -447,18 +533,83 @@ jinit_upsampler (j_decompress_ptr cinfo)
     } else if (h_in_group * 2 == h_out_group &&
               v_in_group == v_out_group) {
       /* Special cases for 2h1v upsampling */
-      if (do_fancy && compptr->downsampled_width > 2)
-       upsample->methods[ci] = h2v1_fancy_upsample;
-      else
-       upsample->methods[ci] = h2v1_upsample;
+      if (do_fancy && compptr->downsampled_width > 2) {
+#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2))
+         upsample->methods[ci] = jpeg_h2v1_fancy_upsample_sse2;
+       else
+#endif
+#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         upsample->methods[ci] = jpeg_h2v1_fancy_upsample_mmx;
+       else
+#endif
+         upsample->methods[ci] = h2v1_fancy_upsample;
+      } else {
+#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2)
+         upsample->methods[ci] = jpeg_h2v1_upsample_sse2;
+       else
+#endif
+#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         upsample->methods[ci] = jpeg_h2v1_upsample_mmx;
+       else
+#endif
+         upsample->methods[ci] = h2v1_upsample;
+      }
     } else if (h_in_group * 2 == h_out_group &&
               v_in_group * 2 == v_out_group) {
       /* Special cases for 2h2v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
-       upsample->methods[ci] = h2v2_fancy_upsample;
+#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2))
+         upsample->methods[ci] = jpeg_h2v2_fancy_upsample_sse2;
+       else
+#endif
+#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         upsample->methods[ci] = jpeg_h2v2_fancy_upsample_mmx;
+       else
+#endif
+         upsample->methods[ci] = h2v2_fancy_upsample;
+       upsample->pub.need_context_rows = TRUE;
+      } else {
+#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2)
+         upsample->methods[ci] = jpeg_h2v2_upsample_sse2;
+       else
+#endif
+#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         upsample->methods[ci] = jpeg_h2v2_upsample_mmx;
+       else
+#endif
+         upsample->methods[ci] = h2v2_upsample;
+      }
+#ifdef UPSAMPLE_H1V2_SUPPORTED
+    } else if (h_in_group == h_out_group &&
+              v_in_group * 2 == v_out_group) {
+      /* Special cases for 1h2v upsampling */
+      if (do_fancy) {
+#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+       if (simd & JSIMD_SSE2 &&
+           IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2))
+         upsample->methods[ci] = jpeg_h1v2_fancy_upsample_sse2;
+       else
+#endif
+#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+       if (simd & JSIMD_MMX)
+         upsample->methods[ci] = jpeg_h1v2_fancy_upsample_mmx;
+       else
+#endif
+         upsample->methods[ci] = h1v2_fancy_upsample;
        upsample->pub.need_context_rows = TRUE;
       } else
-       upsample->methods[ci] = h2v2_upsample;
+       upsample->methods[ci] = h1v2_upsample;
+#endif /* UPSAMPLE_H1V2_SUPPORTED */
     } else if ((h_out_group % h_in_group) == 0 &&
               (v_out_group % v_in_group) == 0) {
       /* Generic integral-factors upsampling method */
@@ -468,11 +619,52 @@ jinit_upsampler (j_decompress_ptr cinfo)
     } else
       ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
     if (need_buffer) {
+      enum { SIZEOF_XMMWORD = 16 };    /* from jsimdext.inc */
       upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
        ((j_common_ptr) cinfo, JPOOL_IMAGE,
-        (JDIMENSION) jround_up((long) cinfo->output_width,
-                               (long) cinfo->max_h_samp_factor),
+        (JDIMENSION) jround_up(jround_up((long) cinfo->output_width,
+                                         (long) cinfo->max_h_samp_factor),
+                               (long) (2 * SIZEOF_XMMWORD)),
         (JDIMENSION) cinfo->max_v_samp_factor);
     }
   }
 }
+
+
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+
+GLOBAL(unsigned int)
+jpeg_simd_upsampler (j_decompress_ptr cinfo, int do_fancy)
+{
+  unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo);
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  if (!do_fancy)
+    return jpeg_simd_merged_upsampler(cinfo);
+#endif
+
+  if (do_fancy) {
+#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2 &&
+        IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2))
+      return JSIMD_SSE2;
+#endif
+#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+  } else {
+#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED
+    if (simd & JSIMD_SSE2)
+      return JSIMD_SSE2;
+#endif
+#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED
+    if (simd & JSIMD_MMX)
+      return JSIMD_MMX;
+#endif
+  }
+
+  return JSIMD_NONE;
+}
+
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
diff --git a/jdsamss2.asm b/jdsamss2.asm
new file mode 100644 (file)
index 0000000..46fcf51
--- /dev/null
@@ -0,0 +1,883 @@
+;
+; jdsamss2.asm - upsampling (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jcolsamp.inc"
+
+%ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fancy_upsample_sse2)
+
+EXTN(jconst_fancy_upsample_sse2):
+
+PW_ONE         times 8 dw  1
+PW_TWO         times 8 dw  2
+PW_THREE       times 8 dw  3
+PW_SEVEN       times 8 dw  7
+PW_EIGHT       times 8 dw  8
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+;
+; The upsampling algorithm is linear interpolation between pixel centers,
+; also known as a "triangle filter".  This is a good compromise between
+; speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+; of the way between input pixel centers.
+;
+; GLOBAL(void)
+; jpeg_h2v1_fancy_upsample_sse2 (j_decompress_ptr cinfo,
+;                                jpeg_component_info * compptr,
+;                                JSAMPARRAY input_data,
+;                                JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_fancy_upsample_sse2)
+
+EXTN(jpeg_h2v1_fancy_upsample_sse2):
+       push    ebp
+       mov     ebp,esp
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       test    eax,eax
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                     ; colctr
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]     ; inptr
+       mov     edi, JSAMPROW [edi]     ; outptr
+
+       test    eax, SIZEOF_XMMWORD-1
+       jz      short .skip
+       mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+.skip:
+       pxor    xmm0,xmm0               ; xmm0=(all 0's)
+       pcmpeqb xmm7,xmm7
+       psrldq  xmm7,(SIZEOF_XMMWORD-1)
+       pand    xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+       add     eax, byte SIZEOF_XMMWORD-1
+       and     eax, byte -SIZEOF_XMMWORD
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .columnloop
+       alignx  16,7
+
+.columnloop_last:
+       pcmpeqb xmm6,xmm6
+       pslldq  xmm6,(SIZEOF_XMMWORD-1)
+       pand    xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       jmp     short .upsample
+       alignx  16,7
+
+.columnloop:
+       movdqa  xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
+       pslldq  xmm6,(SIZEOF_XMMWORD-1)
+
+.upsample:
+       movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+       movdqa  xmm2,xmm1
+       movdqa  xmm3,xmm1               ; xmm1=( 0  1  2 ... 13 14 15)
+       pslldq  xmm2,1                  ; xmm2=(--  0  1 ... 12 13 14)
+       psrldq  xmm3,1                  ; xmm3=( 1  2  3 ... 14 15 --)
+
+       por     xmm2,xmm7               ; xmm2=(-1  0  1 ... 12 13 14)
+       por     xmm3,xmm6               ; xmm3=( 1  2  3 ... 14 15 16)
+
+       movdqa  xmm7,xmm1
+       psrldq  xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
+
+       movdqa    xmm4,xmm1
+       punpcklbw xmm1,xmm0             ; xmm1=( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm4,xmm0             ; xmm4=( 8  9 10 11 12 13 14 15)
+       movdqa    xmm5,xmm2
+       punpcklbw xmm2,xmm0             ; xmm2=(-1  0  1  2  3  4  5  6)
+       punpckhbw xmm5,xmm0             ; xmm5=( 7  8  9 10 11 12 13 14)
+       movdqa    xmm6,xmm3
+       punpcklbw xmm3,xmm0             ; xmm3=( 1  2  3  4  5  6  7  8)
+       punpckhbw xmm6,xmm0             ; xmm6=( 9 10 11 12 13 14 15 16)
+
+       pmullw  xmm1,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   xmm2,[GOTOFF(ebx,PW_ONE)]
+       paddw   xmm5,[GOTOFF(ebx,PW_ONE)]
+       paddw   xmm3,[GOTOFF(ebx,PW_TWO)]
+       paddw   xmm6,[GOTOFF(ebx,PW_TWO)]
+
+       paddw   xmm2,xmm1
+       paddw   xmm5,xmm4
+       psrlw   xmm2,2                  ; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
+       psrlw   xmm5,2                  ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
+       paddw   xmm3,xmm1
+       paddw   xmm6,xmm4
+       psrlw   xmm3,2                  ; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
+       psrlw   xmm6,2                  ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
+
+       psllw   xmm3,BYTE_BIT
+       psllw   xmm6,BYTE_BIT
+       por     xmm2,xmm3               ; xmm2=OutL=( 0  1  2 ... 13 14 15)
+       por     xmm5,xmm6               ; xmm5=OutH=(16 17 18 ... 29 30 31)
+
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5
+
+       sub     eax, byte SIZEOF_XMMWORD
+       add     esi, byte 1*SIZEOF_XMMWORD      ; inptr
+       add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      near .columnloop
+       test    eax,eax
+       jnz     near .columnloop_last
+
+       pop     esi
+       pop     edi
+       pop     eax
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     ecx                             ; rowctr
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jpeg_h2v2_fancy_upsample_sse2 (j_decompress_ptr cinfo,
+;                                jpeg_component_info * compptr,
+;                                JSAMPARRAY input_data,
+;                                JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         4
+%define gotptr         wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_fancy_upsample_sse2)
+
+EXTN(jpeg_h2v2_fancy_upsample_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     edx,eax                         ; edx = original ebp
+       mov     eax, POINTER [compptr(edx)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       test    eax,eax
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(edx)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(edx)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                                     ; colctr
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+       test    eax, SIZEOF_XMMWORD-1
+       jz      short .skip
+       push    edx
+       mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+       mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+       mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+       pop     edx
+.skip:
+       ; -- process the first column block
+
+       movdqa  xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD]    ; xmm0=row[ 0][0]
+       movdqa  xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]    ; xmm1=row[-1][0]
+       movdqa  xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]    ; xmm2=row[+1][0]
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pxor      xmm3,xmm3             ; xmm3=(all 0's)
+       movdqa    xmm4,xmm0
+       punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm5,xmm1
+       punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm6,xmm2
+       punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+       pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+
+       pcmpeqb xmm7,xmm7
+       psrldq  xmm7,(SIZEOF_XMMWORD-2)
+
+       paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+       paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+       paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+       paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+       movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1    ; temporarily save
+       movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6
+
+       pand    xmm1,xmm7               ; xmm1=( 0 -- -- -- -- -- -- --)
+       pand    xmm2,xmm7               ; xmm2=( 0 -- -- -- -- -- -- --)
+
+       movdqa  XMMWORD [wk(0)], xmm1
+       movdqa  XMMWORD [wk(1)], xmm2
+
+       poppic  ebx
+
+       add     eax, byte SIZEOF_XMMWORD-1
+       and     eax, byte -SIZEOF_XMMWORD
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      short .columnloop
+       alignx  16,7
+
+.columnloop_last:
+       ; -- process the last column block
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pcmpeqb xmm1,xmm1
+       pslldq  xmm1,(SIZEOF_XMMWORD-2)
+       movdqa  xmm2,xmm1
+
+       pand    xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD]
+       pand    xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD]
+
+       movdqa  XMMWORD [wk(2)], xmm1   ; xmm1=(-- -- -- -- -- -- -- 15)
+       movdqa  XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
+
+       jmp     near .upsample
+       alignx  16,7
+
+.columnloop:
+       ; -- process the next column block
+
+       movdqa  xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD]    ; xmm0=row[ 0][1]
+       movdqa  xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]    ; xmm1=row[-1][1]
+       movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]    ; xmm2=row[+1][1]
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       pxor      xmm3,xmm3             ; xmm3=(all 0's)
+       movdqa    xmm4,xmm0
+       punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm5,xmm1
+       punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm6,xmm2
+       punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+       pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+
+       paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+       paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+       paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+       paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+       movdqa  XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1    ; temporarily save
+       movdqa  XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+       movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6
+
+       pslldq  xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- --  0)
+       pslldq  xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- --  0)
+
+       movdqa  XMMWORD [wk(2)], xmm1
+       movdqa  XMMWORD [wk(3)], xmm2
+
+.upsample:
+       ; -- process the upper row
+
+       movdqa  xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD]
+       movdqa  xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD]
+
+       movdqa  xmm0,xmm7               ; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
+       movdqa  xmm4,xmm3               ; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
+       psrldq  xmm0,2                  ; xmm0=( 1  2  3  4  5  6  7 --)
+       pslldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- --  8)
+       movdqa  xmm5,xmm7
+       movdqa  xmm6,xmm3
+       psrldq  xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
+       pslldq  xmm6,2                  ; xmm6=(--  8  9 10 11 12 13 14)
+
+       por     xmm0,xmm4               ; xmm0=( 1  2  3  4  5  6  7  8)
+       por     xmm5,xmm6               ; xmm5=( 7  8  9 10 11 12 13 14)
+
+       movdqa  xmm1,xmm7
+       movdqa  xmm2,xmm3
+       pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+       psrldq  xmm2,2                  ; xmm2=( 9 10 11 12 13 14 15 --)
+       movdqa  xmm4,xmm3
+       psrldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
+
+       por     xmm1, XMMWORD [wk(0)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+       por     xmm2, XMMWORD [wk(2)]   ; xmm2=( 9 10 11 12 13 14 15 16)
+
+       movdqa  XMMWORD [wk(0)], xmm4
+
+       pmullw  xmm7,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm3,[GOTOFF(ebx,PW_THREE)]
+       paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   xmm5,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   xmm0,[GOTOFF(ebx,PW_SEVEN)]
+       paddw   xmm2,[GOTOFF(ebx,PW_SEVEN)]
+
+       paddw   xmm1,xmm7
+       paddw   xmm5,xmm3
+       psrlw   xmm1,4                  ; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
+       psrlw   xmm5,4                  ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
+       paddw   xmm0,xmm7
+       paddw   xmm2,xmm3
+       psrlw   xmm0,4                  ; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
+       psrlw   xmm2,4                  ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
+
+       psllw   xmm0,BYTE_BIT
+       psllw   xmm2,BYTE_BIT
+       por     xmm1,xmm0               ; xmm1=Out0L=( 0  1  2 ... 13 14 15)
+       por     xmm5,xmm2               ; xmm5=Out0H=(16 17 18 ... 29 30 31)
+
+       movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1
+       movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5
+
+       ; -- process the lower row
+
+       movdqa  xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD]
+       movdqa  xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD]
+
+       movdqa  xmm7,xmm6               ; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
+       movdqa  xmm3,xmm4               ; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
+       psrldq  xmm7,2                  ; xmm7=( 1  2  3  4  5  6  7 --)
+       pslldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- --  8)
+       movdqa  xmm0,xmm6
+       movdqa  xmm2,xmm4
+       psrldq  xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
+       pslldq  xmm2,2                  ; xmm2=(--  8  9 10 11 12 13 14)
+
+       por     xmm7,xmm3               ; xmm7=( 1  2  3  4  5  6  7  8)
+       por     xmm0,xmm2               ; xmm0=( 7  8  9 10 11 12 13 14)
+
+       movdqa  xmm1,xmm6
+       movdqa  xmm5,xmm4
+       pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+       psrldq  xmm5,2                  ; xmm5=( 9 10 11 12 13 14 15 --)
+       movdqa  xmm3,xmm4
+       psrldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
+
+       por     xmm1, XMMWORD [wk(1)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+       por     xmm5, XMMWORD [wk(3)]   ; xmm5=( 9 10 11 12 13 14 15 16)
+
+       movdqa  XMMWORD [wk(1)], xmm3
+
+       pmullw  xmm6,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   xmm0,[GOTOFF(ebx,PW_EIGHT)]
+       paddw   xmm7,[GOTOFF(ebx,PW_SEVEN)]
+       paddw   xmm5,[GOTOFF(ebx,PW_SEVEN)]
+
+       paddw   xmm1,xmm6
+       paddw   xmm0,xmm4
+       psrlw   xmm1,4                  ; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
+       psrlw   xmm0,4                  ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
+       paddw   xmm7,xmm6
+       paddw   xmm5,xmm4
+       psrlw   xmm7,4                  ; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
+       psrlw   xmm5,4                  ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
+
+       psllw   xmm7,BYTE_BIT
+       psllw   xmm5,BYTE_BIT
+       por     xmm1,xmm7               ; xmm1=Out1L=( 0  1  2 ... 13 14 15)
+       por     xmm0,xmm5               ; xmm0=Out1H=(16 17 18 ... 29 30 31)
+
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
+       movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
+
+       poppic  ebx
+
+       sub     eax, byte SIZEOF_XMMWORD
+       add     ecx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+       add     ebx, byte 1*SIZEOF_XMMWORD      ; inptr0
+       add     esi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+       add     edx, byte 2*SIZEOF_XMMWORD      ; outptr0
+       add     edi, byte 2*SIZEOF_XMMWORD      ; outptr1
+       cmp     eax, byte SIZEOF_XMMWORD
+       ja      near .columnloop
+       test    eax,eax
+       jnz     near .columnloop_last
+
+       pop     esi
+       pop     edi
+       pop     ecx
+       pop     eax
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%ifdef UPSAMPLE_H1V2_SUPPORTED
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 1:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jpeg_h1v2_fancy_upsample_sse2 (j_decompress_ptr cinfo,
+;                                jpeg_component_info * compptr,
+;                                JSAMPARRAY input_data,
+;                                JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define gotptr         ebp-SIZEOF_POINTER      ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_h1v2_fancy_upsample_sse2)
+
+EXTN(jpeg_h1v2_fancy_upsample_sse2):
+       push    ebp
+       mov     ebp,esp
+       pushpic eax             ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       mov     eax, POINTER [compptr(ebp)]
+       mov     eax, JDIMENSION [jcompinfo_downsampled_width(eax)]  ; colctr
+       add     eax, byte SIZEOF_XMMWORD-1
+       and     eax, byte -SIZEOF_XMMWORD
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    eax                                     ; colctr
+       push    ecx
+       push    edi
+       push    esi
+
+       mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+       mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+       mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+       pxor    xmm0,xmm0               ; xmm0=(all 0's)
+       alignx  16,7
+
+.columnloop:
+       movdqa  xmm1, XMMWORD [ebx]     ; xmm1=row[ 0]( 0  1  2 ... 13 14 15)
+       movdqa  xmm2, XMMWORD [ecx]     ; xmm2=row[-1]( 0  1  2 ... 13 14 15)
+       movdqa  xmm3, XMMWORD [esi]     ; xmm3=row[+1]( 0  1  2 ... 13 14 15)
+
+       pushpic ebx
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       movdqa    xmm4,xmm1
+       punpcklbw xmm1,xmm0             ; xmm1=row[ 0]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm4,xmm0             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm5,xmm2
+       punpcklbw xmm2,xmm0             ; xmm2=row[-1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm5,xmm0             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+       movdqa    xmm6,xmm3
+       punpcklbw xmm3,xmm0             ; xmm3=row[+1]( 0  1  2  3  4  5  6  7)
+       punpckhbw xmm6,xmm0             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+       pmullw  xmm1,[GOTOFF(ebx,PW_THREE)]
+       pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+       paddw   xmm2,[GOTOFF(ebx,PW_ONE)]
+       paddw   xmm5,[GOTOFF(ebx,PW_ONE)]
+       paddw   xmm3,[GOTOFF(ebx,PW_TWO)]
+       paddw   xmm6,[GOTOFF(ebx,PW_TWO)]
+
+       paddw   xmm2,xmm1
+       paddw   xmm5,xmm4
+       psrlw   xmm2,2                  ; xmm2=Out0L=( 0  1  2  3  4  5  6  7)
+       psrlw   xmm5,2                  ; xmm5=Out0H=( 8  9 10 11 12 13 14 15)
+       paddw   xmm3,xmm1
+       paddw   xmm6,xmm4
+       psrlw   xmm3,2                  ; xmm3=Out1L=( 0  1  2  3  4  5  6  7)
+       psrlw   xmm6,2                  ; xmm6=Out1H=( 8  9 10 11 12 13 14 15)
+
+       packuswb  xmm2,xmm5             ; xmm2=Out0=( 0  1  2 ... 13 14 15)
+       packuswb  xmm3,xmm6             ; xmm3=Out1=( 0  1  2 ... 13 14 15)
+
+       movdqa  XMMWORD [edx], xmm2
+       movdqa  XMMWORD [edi], xmm3
+
+       poppic  ebx
+
+       add     ecx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+       add     ebx, byte 1*SIZEOF_XMMWORD      ; inptr0
+       add     esi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+       add     edx, byte 1*SIZEOF_XMMWORD      ; outptr0
+       add     edi, byte 1*SIZEOF_XMMWORD      ; outptr1
+       sub     eax, byte SIZEOF_XMMWORD
+       jnz     near .columnloop
+
+       pop     esi
+       pop     edi
+       pop     ecx
+       pop     eax
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      near .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       poppic  eax             ; remove gotptr
+       pop     ebp
+       ret
+
+%endif ; UPSAMPLE_H1V2_SUPPORTED
+%endif ; JDSAMPLE_FANCY_SSE2_SUPPORTED
+
+%ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED
+
+%ifndef JDSAMPLE_FANCY_SSE2_SUPPORTED
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+%endif
+;
+; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jpeg_h2v1_upsample_sse2 (j_decompress_ptr cinfo,
+;                          jpeg_component_info * compptr,
+;                          JSAMPARRAY input_data,
+;                          JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v1_upsample_sse2)
+
+EXTN(jpeg_h2v1_upsample_sse2):
+       push    ebp
+       mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jdstruct_output_width(edx)]
+       add     edx, byte (2*SIZEOF_XMMWORD)-1
+       and     edx, byte -(2*SIZEOF_XMMWORD)
+       jz      short .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      short .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]             ; inptr
+       mov     edi, JSAMPROW [edi]             ; outptr
+       mov     eax,edx                         ; colctr
+       alignx  16,7
+.columnloop:
+
+       movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+       movdqa    xmm1,xmm0
+       punpcklbw xmm0,xmm0
+       punpckhbw xmm1,xmm1
+
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+       movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+
+       sub     eax, byte 2*SIZEOF_XMMWORD
+       jz      short .nextrow
+
+       movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+       movdqa    xmm3,xmm2
+       punpcklbw xmm2,xmm2
+       punpckhbw xmm3,xmm3
+
+       movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+
+       sub     eax, byte 2*SIZEOF_XMMWORD
+       jz      short .nextrow
+
+       add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+       add     edi, byte 4*SIZEOF_XMMWORD      ; outptr
+       jmp     short .columnloop
+       alignx  16,7
+
+.nextrow:
+       pop     esi
+       pop     edi
+
+       add     esi, byte SIZEOF_JSAMPROW       ; input_data
+       add     edi, byte SIZEOF_JSAMPROW       ; output_data
+       dec     ecx                             ; rowctr
+       jg      short .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jpeg_h2v2_upsample_sse2 (j_decompress_ptr cinfo,
+;                          jpeg_component_info * compptr,
+;                          JSAMPARRAY input_data,
+;                          JSAMPARRAY * output_data_ptr);
+;
+
+%define cinfo(b)               (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)             (b)+12          ; jpeg_component_info * compptr
+%define input_data(b)          (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)     (b)+20          ; JSAMPARRAY * output_data_ptr
+
+       align   16
+       global  EXTN(jpeg_h2v2_upsample_sse2)
+
+EXTN(jpeg_h2v2_upsample_sse2):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, JDIMENSION [jdstruct_output_width(edx)]
+       add     edx, byte (2*SIZEOF_XMMWORD)-1
+       and     edx, byte -(2*SIZEOF_XMMWORD)
+       jz      near .return
+
+       mov     ecx, POINTER [cinfo(ebp)]
+       mov     ecx, INT [jdstruct_max_v_samp_factor(ecx)]      ; rowctr
+       test    ecx,ecx
+       jz      near .return
+
+       mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+       mov     edi, POINTER [output_data_ptr(ebp)]
+       mov     edi, JSAMPARRAY [edi]                   ; output_data
+       alignx  16,7
+.rowloop:
+       push    edi
+       push    esi
+
+       mov     esi, JSAMPROW [esi]                     ; inptr
+       mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+       mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+       mov     eax,edx                                 ; colctr
+       alignx  16,7
+.columnloop:
+
+       movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+       movdqa    xmm1,xmm0
+       punpcklbw xmm0,xmm0
+       punpckhbw xmm1,xmm1
+
+       movdqa  XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
+       movdqa  XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
+       movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+       movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+
+       sub     eax, byte 2*SIZEOF_XMMWORD
+       jz      short .nextrow
+
+       movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+       movdqa    xmm3,xmm2
+       punpcklbw xmm2,xmm2
+       punpckhbw xmm3,xmm3
+
+       movdqa  XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3
+       movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+       movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+
+       sub     eax, byte 2*SIZEOF_XMMWORD
+       jz      short .nextrow
+
+       add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+       add     ebx, byte 4*SIZEOF_XMMWORD      ; outptr0
+       add     edi, byte 4*SIZEOF_XMMWORD      ; outptr1
+       jmp     short .columnloop
+       alignx  16,7
+
+.nextrow:
+       pop     esi
+       pop     edi
+
+       add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+       add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+       sub     ecx, byte 2                     ; rowctr
+       jg      short .rowloop
+
+.return:
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; JDSAMPLE_SIMPLE_SSE2_SUPPORTED
diff --git a/jf3dnflt.asm b/jf3dnflt.asm
new file mode 100644 (file)
index 0000000..7117dd4
--- /dev/null
@@ -0,0 +1,327 @@
+;
+; jf3dnflt.asm - floating-point FDCT (3DNow!)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fdct_float_3dnow)
+
+EXTN(jconst_fdct_float_3dnow):
+
+PD_0_382       times 2 dd  0.382683432365089771728460
+PD_0_707       times 2 dd  0.707106781186547524400844
+PD_0_541       times 2 dd  0.541196100146196984399723
+PD_1_306       times 2 dd  1.306562964876376527856643
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_float_3dnow (FAST_FLOAT * data)
+;
+
+%define data(b)                (b)+8           ; FAST_FLOAT * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_fdct_float_3dnow)
+
+EXTN(jpeg_fdct_float_3dnow):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE/2
+       alignx  16,7
+.rowloop:
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+
+       ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17)
+
+       movq      mm4,mm0               ; transpose coefficients
+       punpckldq mm0,mm1               ; mm0=(00 10)=data0
+       punpckhdq mm4,mm1               ; mm4=(01 11)=data1
+       movq      mm5,mm2               ; transpose coefficients
+       punpckldq mm2,mm3               ; mm2=(06 16)=data6
+       punpckhdq mm5,mm3               ; mm5=(07 17)=data7
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+       pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+       pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+       pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
+
+       movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+
+       ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
+
+       movq      mm4,mm1               ; transpose coefficients
+       punpckldq mm1,mm3               ; mm1=(02 12)=data2
+       punpckhdq mm4,mm3               ; mm4=(03 13)=data3
+       movq      mm0,mm2               ; transpose coefficients
+       punpckldq mm2,mm5               ; mm2=(04 14)=data4
+       punpckhdq mm0,mm5               ; mm0=(05 15)=data5
+
+       movq    mm3,mm4
+       movq    mm5,mm1
+       pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+       pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+       pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+       pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm2,mm7
+       movq    mm0,mm6
+       pfsub   mm7,mm4                 ; mm7=tmp13
+       pfsub   mm6,mm1                 ; mm6=tmp12
+       pfadd   mm2,mm4                 ; mm2=tmp10
+       pfadd   mm0,mm1                 ; mm0=tmp11
+
+       pfadd   mm6,mm7
+       pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+
+       movq    mm4,mm2
+       movq    mm1,mm7
+       pfsub   mm2,mm0                 ; mm2=data4
+       pfsub   mm7,mm6                 ; mm7=data6
+       pfadd   mm4,mm0                 ; mm4=data0
+       pfadd   mm1,mm6                 ; mm1=data2
+
+       movq    MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2
+       movq    MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1
+
+       ; -- Odd part
+
+       movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+       movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
+
+       pfadd   mm3,mm5                 ; mm3=tmp10
+       pfadd   mm5,mm0                 ; mm5=tmp11
+       pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
+
+       pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+
+       movq    mm2,mm3                 ; mm2=tmp10
+       pfsub   mm3,mm0
+       pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+       pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+       pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+       pfadd   mm2,mm3                 ; mm2=z2
+       pfadd   mm0,mm3                 ; mm0=z4
+
+       movq    mm7,mm6
+       pfsub   mm6,mm5                 ; mm6=z13
+       pfadd   mm7,mm5                 ; mm7=z11
+
+       movq    mm4,mm6
+       movq    mm1,mm7
+       pfsub   mm6,mm2                 ; mm6=data3
+       pfsub   mm7,mm0                 ; mm7=data7
+       pfadd   mm4,mm2                 ; mm4=data5
+       pfadd   mm1,mm0                 ; mm1=data1
+
+       movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6
+       movq    MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7
+       movq    MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+       add     edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE/2
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71)
+
+       movq      mm4,mm0               ; transpose coefficients
+       punpckldq mm0,mm1               ; mm0=(00 01)=data0
+       punpckhdq mm4,mm1               ; mm4=(10 11)=data1
+       movq      mm5,mm2               ; transpose coefficients
+       punpckldq mm2,mm3               ; mm2=(60 61)=data6
+       punpckhdq mm5,mm3               ; mm5=(70 71)=data7
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+       pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+       pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+       pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
+
+       movq    mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+       movq    mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
+
+       movq      mm4,mm1               ; transpose coefficients
+       punpckldq mm1,mm3               ; mm1=(20 21)=data2
+       punpckhdq mm4,mm3               ; mm4=(30 31)=data3
+       movq      mm0,mm2               ; transpose coefficients
+       punpckldq mm2,mm5               ; mm2=(40 41)=data4
+       punpckhdq mm0,mm5               ; mm0=(50 51)=data5
+
+       movq    mm3,mm4
+       movq    mm5,mm1
+       pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+       pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+       pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+       pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm2,mm7
+       movq    mm0,mm6
+       pfsub   mm7,mm4                 ; mm7=tmp13
+       pfsub   mm6,mm1                 ; mm6=tmp12
+       pfadd   mm2,mm4                 ; mm2=tmp10
+       pfadd   mm0,mm1                 ; mm0=tmp11
+
+       pfadd   mm6,mm7
+       pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+
+       movq    mm4,mm2
+       movq    mm1,mm7
+       pfsub   mm2,mm0                 ; mm2=data4
+       pfsub   mm7,mm6                 ; mm7=data6
+       pfadd   mm4,mm0                 ; mm4=data0
+       pfadd   mm1,mm6                 ; mm1=data2
+
+       movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2
+       movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+       ; -- Odd part
+
+       movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+       movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
+
+       pfadd   mm3,mm5                 ; mm3=tmp10
+       pfadd   mm5,mm0                 ; mm5=tmp11
+       pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
+
+       pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+
+       movq    mm2,mm3                 ; mm2=tmp10
+       pfsub   mm3,mm0
+       pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+       pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+       pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+       pfadd   mm2,mm3                 ; mm2=z2
+       pfadd   mm0,mm3                 ; mm0=z4
+
+       movq    mm7,mm6
+       pfsub   mm6,mm5                 ; mm6=z13
+       pfadd   mm7,mm5                 ; mm7=z11
+
+       movq    mm4,mm6
+       movq    mm1,mm7
+       pfsub   mm6,mm2                 ; mm6=data3
+       pfsub   mm7,mm0                 ; mm7=data7
+       pfadd   mm4,mm2                 ; mm4=data5
+       pfadd   mm1,mm0                 ; mm1=data1
+
+       movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6
+       movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7
+       movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+       add     edx, byte 2*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .columnloop
+
+       femms           ; empty MMX/3DNow! state
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_FLT_3DNOW_MMX_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jfdctflt.asm b/jfdctflt.asm
new file mode 100644 (file)
index 0000000..178e1f9
--- /dev/null
@@ -0,0 +1,288 @@
+;
+; jfdctflt.asm - floating-point FDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+%define ROTATOR_TYPE   FP32    ; float
+
+       alignz  16
+       global  EXTN(jconst_fdct_float)
+
+EXTN(jconst_fdct_float):
+
+F_0_382        dd      0.382683432365089771728460      ; cos(PI*3/8)
+F_0_707        dd      0.707106781186547524400844      ; cos(PI*1/4)
+F_0_541        dd      0.541196100146196984399723      ; cos(PI*1/8)-cos(PI*3/8)
+F_1_306        dd      1.306562964876376527856643      ; cos(PI*1/8)+cos(PI*3/8)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_float (FAST_FLOAT * data)
+;
+
+%define data(b)        (b)+8           ; FAST_FLOAT * data
+
+       align   16
+       global  EXTN(jpeg_fdct_float)
+
+EXTN(jpeg_fdct_float):
+       push    ebp
+       mov     ebp,esp
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(ebp)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.rowloop:
+       fld     FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)]
+
+       ; -- Even part
+
+       fld     st2     ; st2 = st2 + st1, st1 = st2 - st1
+       fsub    st0,st2
+       fxch    st0,st2
+       faddp   st3,st0
+       fld     st3     ; st3 = st3 + st0, st0 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st4,st0
+
+       fadd    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_707)]
+
+       fld     st2     ; st3 = st2 + st3, st2 = st2 - st3
+       fsub    st0,st4
+       fxch    st0,st3
+       faddp   st4,st0
+       fld     st1     ; st0 = st1 + st0, st1 = st1 - st0
+       fsub    st0,st1
+       fxch    st0,st2
+       faddp   st1,st0
+
+       fld     FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+
+       fstp    FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; -- Odd part
+
+       fadd    st2,st0
+       fadd    st0,st1
+       fxch    st0,st3
+       fadd    st1,st0
+       fxch    st0,st3
+
+       fld     st2
+       fxch    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_707)]
+       fxch    st0,st1
+       fsub    st0,st2
+       fxch    st0,st3
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_541)]
+       fxch    st0,st3
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_382)]
+       fxch    st0,st2
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_306)]
+       fxch    st0,st2
+       fadd    st3,st0
+       faddp   st2,st0
+
+       fld     st3     ; st3 = st3 + st0, st0 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st4,st0
+
+       fld     st2     ; st0 = st0 + st2, st2 = st0 - st2
+       fsubr   st0,st1
+       fxch    st0,st3
+       faddp   st1,st0
+       fld     st1     ; st3 = st3 + st1, st1 = st3 - st1
+       fsubr   st0,st4
+       fxch    st0,st2
+       faddp   st4,st0
+
+       fstp    FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)]
+
+       add     edx, byte DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx                             ; advance pointer to next row
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(ebp)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.columnloop:
+       fld     FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)]
+       fadd    FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)]
+
+       ; -- Even part
+
+       fld     st2     ; st2 = st2 + st1, st1 = st2 - st1
+       fsub    st0,st2
+       fxch    st0,st2
+       faddp   st3,st0
+       fld     st3     ; st3 = st3 + st0, st0 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st4,st0
+
+       fadd    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_707)]
+
+       fld     st2     ; st3 = st2 + st3, st2 = st2 - st3
+       fsub    st0,st4
+       fxch    st0,st3
+       faddp   st4,st0
+       fld     st1     ; st0 = st1 + st0, st1 = st1 - st0
+       fsub    st0,st1
+       fxch    st0,st2
+       faddp   st1,st0
+
+       fld     FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+       fld     FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)]
+       fsub    FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st4
+
+       fstp    FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; -- Odd part
+
+       fadd    st2,st0
+       fadd    st0,st1
+       fxch    st0,st3
+       fadd    st1,st0
+       fxch    st0,st3
+
+       fld     st2
+       fxch    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_707)]
+       fxch    st0,st1
+       fsub    st0,st2
+       fxch    st0,st3
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_541)]
+       fxch    st0,st3
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_0_382)]
+       fxch    st0,st2
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_306)]
+       fxch    st0,st2
+       fadd    st3,st0
+       faddp   st2,st0
+
+       fld     st3     ; st3 = st3 + st0, st0 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st4,st0
+
+       fld     st2     ; st0 = st0 + st2, st2 = st0 - st2
+       fsubr   st0,st1
+       fxch    st0,st3
+       faddp   st1,st0
+       fld     st1     ; st3 = st3 + st1, st1 = st3 - st1
+       fsubr   st0,st4
+       fxch    st0,st2
+       faddp   st4,st0
+
+       fstp    FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)]
+
+       add     edx, byte SIZEOF_FAST_FLOAT ; advance pointer to next column
+       dec     ecx
+       jnz     near .columnloop
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       pop     ebp
+       ret
+
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jfdctfst.asm b/jfdctfst.asm
new file mode 100644 (file)
index 0000000..c73c920
--- /dev/null
@@ -0,0 +1,303 @@
+;
+; jfdctfst.asm - fast integer FDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctfst.c; see the jfdctfst.c for
+; more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+; We can gain a little more speed, with a further compromise in accuracy,
+; by omitting the addition in a descaling shift.  This yields an
+; incorrectly rounded result half the time...
+;
+%macro descale 2
+%ifdef USE_ACCURATE_ROUNDING
+%if (%2)<=7
+       add     %1, byte (1<<((%2)-1))  ; add reg32,imm8
+%else
+       add     %1, (1<<((%2)-1))       ; add reg32,imm32
+%endif
+%endif
+       sar     %1,%2
+%endmacro
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8
+
+%if CONST_BITS == 8
+F_0_382        equ      98             ; FIX(0.382683433)
+F_0_541        equ     139             ; FIX(0.541196100)
+F_0_707        equ     181             ; FIX(0.707106781)
+F_1_306        equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382        equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707        equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306        equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_ifast (DCTELEM * data)
+;
+
+%define data(b)        (b)+8           ; DCTELEM * data
+
+       align   16
+       global  EXTN(jpeg_fdct_ifast)
+
+EXTN(jpeg_fdct_ifast):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process rows.
+
+       mov     ecx, DCTSIZE
+       mov     edx, POINTER [data(ebp)]        ; (DCTELEM *)
+       alignx  16,7
+.rowloop:
+       push    ecx             ; ctr
+       push    edx             ; dataptr
+
+       movsx   eax, DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)]
+       movsx   edi, DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)]
+       lea     esi,[eax+edi]   ; esi=tmp0
+       sub     eax,edi         ; eax=tmp7
+       push    eax
+
+       movsx   ebx, DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)]
+       lea     edi,[ebx+ecx]   ; edi=tmp1
+       sub     ebx,ecx         ; ebx=tmp6
+       push    ebx
+
+       movsx   eax, DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)]
+       lea     ebx,[eax+ecx]   ; ebx=tmp2
+       sub     eax,ecx         ; eax=tmp5
+       push    eax
+
+       movsx   ecx, DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)]
+       movsx   eax, DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)]
+       lea     edx,[ecx+eax]   ; edx=tmp3
+       sub     ecx,eax         ; ecx=tmp4
+       push    ecx
+
+       ; -- Even part
+
+       lea     eax,[esi+edx]   ; eax=tmp10
+       lea     ecx,[edi+ebx]   ; ecx=tmp11
+       sub     esi,edx         ; esi=tmp13
+       sub     edi,ebx         ; edi=tmp12
+
+       mov     edx, POINTER [esp+16]   ; dataptr
+
+       add     edi,esi
+       imul    edi,(F_0_707)   ; edi=z1
+       descale edi,CONST_BITS
+
+       lea     ebx,[eax+ecx]   ; ebx=data0
+       sub     eax,ecx         ; eax=data4
+       mov     DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)], ax
+
+       lea     ecx,[esi+edi]   ; ecx=data2
+       sub     esi,edi         ; esi=data6
+       mov     DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)], si
+
+       ; -- Odd part
+
+       pop     eax     ; eax=tmp4
+       pop     edx     ; edx=tmp5
+       pop     ebx     ; ebx=tmp6
+       pop     edi     ; edi=tmp7
+
+       add     eax,edx         ; eax=tmp10
+       add     edx,ebx         ; edx=tmp11
+       add     ebx,edi         ; ebx=tmp12, edi=tmp7
+
+       imul    edx,(F_0_707)   ; edx=z3
+       descale edx,CONST_BITS
+       lea     esi,[edi+edx]   ; esi=z11
+       sub     edi,edx         ; edi=z13
+
+       mov     ecx,eax         ; ecx=tmp10
+       sub     eax,ebx
+       imul    eax,(F_0_382)   ; eax=z5
+       imul    ecx,(F_0_541)   ; ecx=MULTIPLY(tmp10,FIX_0_541196100)
+       imul    ebx,(F_1_306)   ; ebx=MULTIPLY(tmp12,FIX_1_306562965)
+       descale eax,CONST_BITS
+       descale ecx,CONST_BITS
+       descale ebx,CONST_BITS
+       add     ecx,eax         ; ecx=z2
+       add     ebx,eax         ; ebx=z4
+
+       pop     edx             ; dataptr
+
+       lea     eax,[edi+ecx]   ; eax=data5
+       sub     edi,ecx         ; edi=data3
+       mov     DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)], ax
+       mov     DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)], di
+
+       lea     ecx,[esi+ebx]   ; ecx=data1
+       sub     esi,ebx         ; esi=data7
+       mov     DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)], si
+
+       pop     ecx             ; ctr
+
+       add     edx, byte DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx                     ; advance pointer to next row
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     ecx, DCTSIZE
+       mov     edx, POINTER [data(ebp)]        ; (DCTELEM *)
+       alignx  16,7
+.columnloop:
+       push    ecx             ; ctr
+       push    edx             ; dataptr
+
+       movsx   eax, DCTELEM [COL(0,edx,SIZEOF_DCTELEM)]
+       movsx   edi, DCTELEM [COL(7,edx,SIZEOF_DCTELEM)]
+       lea     esi,[eax+edi]   ; esi=tmp0
+       sub     eax,edi         ; eax=tmp7
+       push    eax
+
+       movsx   ebx, DCTELEM [COL(1,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [COL(6,edx,SIZEOF_DCTELEM)]
+       lea     edi,[ebx+ecx]   ; edi=tmp1
+       sub     ebx,ecx         ; ebx=tmp6
+       push    ebx
+
+       movsx   eax, DCTELEM [COL(2,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [COL(5,edx,SIZEOF_DCTELEM)]
+       lea     ebx,[eax+ecx]   ; ebx=tmp2
+       sub     eax,ecx         ; eax=tmp5
+       push    eax
+
+       movsx   ecx, DCTELEM [COL(3,edx,SIZEOF_DCTELEM)]
+       movsx   eax, DCTELEM [COL(4,edx,SIZEOF_DCTELEM)]
+       lea     edx,[ecx+eax]   ; edx=tmp3
+       sub     ecx,eax         ; ecx=tmp4
+       push    ecx
+
+       ; -- Even part
+
+       lea     eax,[esi+edx]   ; eax=tmp10
+       lea     ecx,[edi+ebx]   ; ecx=tmp11
+       sub     esi,edx         ; esi=tmp13
+       sub     edi,ebx         ; edi=tmp12
+
+       mov     edx, POINTER [esp+16]   ; dataptr
+
+       add     edi,esi
+       imul    edi,(F_0_707)   ; edi=z1
+       descale edi,CONST_BITS
+
+       lea     ebx,[eax+ecx]   ; ebx=data0
+       sub     eax,ecx         ; eax=data4
+       mov     DCTELEM [COL(0,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [COL(4,edx,SIZEOF_DCTELEM)], ax
+
+       lea     ecx,[esi+edi]   ; ecx=data2
+       sub     esi,edi         ; esi=data6
+       mov     DCTELEM [COL(2,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [COL(6,edx,SIZEOF_DCTELEM)], si
+
+       ; -- Odd part
+
+       pop     eax     ; eax=tmp4
+       pop     edx     ; edx=tmp5
+       pop     ebx     ; ebx=tmp6
+       pop     edi     ; edi=tmp7
+
+       add     eax,edx         ; eax=tmp10
+       add     edx,ebx         ; edx=tmp11
+       add     ebx,edi         ; ebx=tmp12, edi=tmp7
+
+       imul    edx,(F_0_707)   ; edx=z3
+       descale edx,CONST_BITS
+       lea     esi,[edi+edx]   ; esi=z11
+       sub     edi,edx         ; edi=z13
+
+       mov     ecx,eax         ; ecx=tmp10
+       sub     eax,ebx
+       imul    eax,(F_0_382)   ; eax=z5
+       imul    ecx,(F_0_541)   ; ecx=MULTIPLY(tmp10,FIX_0_541196100)
+       imul    ebx,(F_1_306)   ; ebx=MULTIPLY(tmp12,FIX_1_306562965)
+       descale eax,CONST_BITS
+       descale ecx,CONST_BITS
+       descale ebx,CONST_BITS
+       add     ecx,eax         ; ecx=z2
+       add     ebx,eax         ; ebx=z4
+
+       pop     edx             ; dataptr
+
+       lea     eax,[edi+ecx]   ; eax=data5
+       sub     edi,ecx         ; edi=data3
+       mov     DCTELEM [COL(5,edx,SIZEOF_DCTELEM)], ax
+       mov     DCTELEM [COL(3,edx,SIZEOF_DCTELEM)], di
+
+       lea     ecx,[esi+ebx]   ; ecx=data1
+       sub     esi,ebx         ; esi=data7
+       mov     DCTELEM [COL(1,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [COL(7,edx,SIZEOF_DCTELEM)], si
+
+       pop     ecx             ; ctr
+
+       add     edx, byte SIZEOF_DCTELEM    ; advance pointer to next column
+       dec     ecx
+       jnz     near .columnloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jfdctint.asm b/jfdctint.asm
new file mode 100644 (file)
index 0000000..0f29725
--- /dev/null
@@ -0,0 +1,342 @@
+;
+; jfdctint.asm - accurate integer FDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+; Descale and correctly round a DWORD value that's scaled by N bits.
+;
+%macro descale 2
+%if (%2)<=7
+       add     %1, byte (1<<((%2)-1))  ; add reg32,imm8
+%else
+       add     %1, (1<<((%2)-1))       ; add reg32,imm32
+%endif
+       sar     %1,%2
+%endmacro
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_islow (DCTELEM * data)
+;
+
+%define data(b)        (b)+8           ; DCTELEM * data
+
+       align   16
+       global  EXTN(jpeg_fdct_islow)
+
+EXTN(jpeg_fdct_islow):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(ebp)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.rowloop:
+       movsx   eax, DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)]
+       movsx   edi, DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)]
+       lea     esi,[eax+edi]   ; esi=tmp0
+       sub     eax,edi         ; eax=tmp7
+       push    ecx             ; ctr
+       push    eax
+
+       movsx   ebx, DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)]
+       lea     edi,[ebx+ecx]   ; edi=tmp1
+       sub     ebx,ecx         ; ebx=tmp6
+       push    ebx
+
+       movsx   eax, DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)]
+       lea     ebx,[eax+ecx]   ; ebx=tmp2
+       sub     eax,ecx         ; eax=tmp5
+       push    edx             ; dataptr
+       push    eax
+
+       movsx   ecx, DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)]
+       movsx   eax, DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)]
+       lea     edx,[ecx+eax]   ; edx=tmp3
+       sub     ecx,eax         ; ecx=tmp4
+       push    ecx
+
+       ; -- Even part
+
+       lea     eax,[esi+edx]   ; eax=tmp10
+       lea     ecx,[edi+ebx]   ; ecx=tmp11
+       sub     esi,edx         ; esi=tmp13
+       sub     edi,ebx         ; edi=tmp12
+
+       lea     ebx,[eax+ecx]   ; ebx=data0
+       sub     eax,ecx         ; eax=data4
+       mov     edx, POINTER [esp+8]    ; dataptr
+       sal     ebx, PASS1_BITS
+       sal     eax, PASS1_BITS
+       mov     DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)], ax
+
+       lea     ecx,[edi+esi]
+       imul    ecx,(F_0_541)   ; ecx=z1
+       imul    esi,(F_0_765)   ; esi=MULTIPLY(tmp13,FIX_0_765366865)
+       imul    edi,(-F_1_847)  ; edi=MULTIPLY(tmp12,-FIX_1_847759065)
+       add     esi,ecx         ; esi=data2
+       add     edi,ecx         ; edi=data6
+       descale esi,(CONST_BITS-PASS1_BITS)
+       descale edi,(CONST_BITS-PASS1_BITS)
+       mov     DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)], si
+       mov     DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)], di
+
+       ; -- Odd part
+
+       mov     eax, INT32 [esp]        ; eax=tmp4
+       mov     ebx, INT32 [esp+4]      ; ebx=tmp5
+       mov     ecx, INT32 [esp+12]     ; ecx=tmp6
+       mov     esi, INT32 [esp+16]     ; esi=tmp7
+
+       lea     edx,[eax+ecx]   ; edx=z3
+       lea     edi,[ebx+esi]   ; edi=z4
+       add     eax,esi         ; eax=z1
+       add     ebx,ecx         ; ebx=z2
+
+       lea     esi,[edx+edi]
+       imul    esi,(F_1_175)   ; esi=z5
+
+       imul    edx,(-F_1_961)  ; edx=z3(=MULTIPLY(z3,-FIX_1_961570560))
+       imul    edi,(-F_0_390)  ; edi=z4(=MULTIPLY(z4,-FIX_0_390180644))
+       imul    eax,(-F_0_899)  ; eax=z1(=MULTIPLY(z1,-FIX_0_899976223))
+       imul    ebx,(-F_2_562)  ; ebx=z2(=MULTIPLY(z2,-FIX_2_562915447))
+
+       add     edx,esi         ; edx=z3(=z3+z5)
+       add     edi,esi         ; edi=z4(=z4+z5)
+
+       lea     ecx,[eax+edx]   ; ecx=z1+z3
+       lea     esi,[ebx+edi]   ; esi=z2+z4
+       add     eax,edi         ; eax=z1+z4
+       add     ebx,edx         ; ebx=z2+z3
+
+       pop     edx             ; edx=tmp4
+       pop     edi             ; edi=tmp5
+       imul    edx,(F_0_298)   ; edx=tmp4(=MULTIPLY(tmp4,FIX_0_298631336))
+       imul    edi,(F_2_053)   ; edi=tmp5(=MULTIPLY(tmp5,FIX_2_053119869))
+       add     ecx,edx         ; ecx=data7(=tmp4+z1+z3)
+       add     esi,edi         ; esi=data5(=tmp5+z2+z4)
+       pop     edx             ; dataptr
+       descale ecx,(CONST_BITS-PASS1_BITS)
+       descale esi,(CONST_BITS-PASS1_BITS)
+       mov     DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)], si
+
+       pop     edi             ; edi=tmp6
+       pop     ecx             ; ecx=tmp7
+       imul    edi,(F_3_072)   ; edi=tmp6(=MULTIPLY(tmp6,FIX_3_072711026))
+       imul    ecx,(F_1_501)   ; ecx=tmp7(=MULTIPLY(tmp7,FIX_1_501321110))
+       add     ebx,edi         ; ebx=data3(=tmp6+z2+z3)
+       add     eax,ecx         ; eax=data1(=tmp7+z1+z4)
+       pop     ecx             ; ctr
+       descale ebx,(CONST_BITS-PASS1_BITS)
+       descale eax,(CONST_BITS-PASS1_BITS)
+       mov     DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)], ax
+
+       add     edx, byte DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx                     ; advance pointer to next row
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(ebp)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE
+       alignx  16,7
+.columnloop:
+       movsx   eax, DCTELEM [COL(0,edx,SIZEOF_DCTELEM)]
+       movsx   edi, DCTELEM [COL(7,edx,SIZEOF_DCTELEM)]
+       lea     esi,[eax+edi]   ; esi=tmp0
+       sub     eax,edi         ; eax=tmp7
+       push    ecx             ; ctr
+       push    eax
+
+       movsx   ebx, DCTELEM [COL(1,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [COL(6,edx,SIZEOF_DCTELEM)]
+       lea     edi,[ebx+ecx]   ; edi=tmp1
+       sub     ebx,ecx         ; ebx=tmp6
+       push    ebx
+
+       movsx   eax, DCTELEM [COL(2,edx,SIZEOF_DCTELEM)]
+       movsx   ecx, DCTELEM [COL(5,edx,SIZEOF_DCTELEM)]
+       lea     ebx,[eax+ecx]   ; ebx=tmp2
+       sub     eax,ecx         ; eax=tmp5
+       push    edx             ; dataptr
+       push    eax
+
+       movsx   ecx, DCTELEM [COL(3,edx,SIZEOF_DCTELEM)]
+       movsx   eax, DCTELEM [COL(4,edx,SIZEOF_DCTELEM)]
+       lea     edx,[ecx+eax]   ; edx=tmp3
+       sub     ecx,eax         ; ecx=tmp4
+       push    ecx
+
+       ; -- Even part
+
+       lea     eax,[esi+edx]   ; eax=tmp10
+       lea     ecx,[edi+ebx]   ; ecx=tmp11
+       sub     esi,edx         ; esi=tmp13
+       sub     edi,ebx         ; edi=tmp12
+
+       lea     ebx,[eax+ecx]   ; ebx=data0
+       sub     eax,ecx         ; eax=data4
+       mov     edx, POINTER [esp+8]    ; dataptr
+       descale ebx, PASS1_BITS
+       descale eax, PASS1_BITS
+       mov     DCTELEM [COL(0,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [COL(4,edx,SIZEOF_DCTELEM)], ax
+
+       lea     ecx,[edi+esi]
+       imul    ecx,(F_0_541)   ; ecx=z1
+       imul    esi,(F_0_765)   ; esi=MULTIPLY(tmp13,FIX_0_765366865)
+       imul    edi,(-F_1_847)  ; edi=MULTIPLY(tmp12,-FIX_1_847759065)
+       add     esi,ecx         ; esi=data2
+       add     edi,ecx         ; edi=data6
+       descale esi,(CONST_BITS+PASS1_BITS)
+       descale edi,(CONST_BITS+PASS1_BITS)
+       mov     DCTELEM [COL(2,edx,SIZEOF_DCTELEM)], si
+       mov     DCTELEM [COL(6,edx,SIZEOF_DCTELEM)], di
+
+       ; -- Odd part
+
+       mov     eax, INT32 [esp]        ; eax=tmp4
+       mov     ebx, INT32 [esp+4]      ; ebx=tmp5
+       mov     ecx, INT32 [esp+12]     ; ecx=tmp6
+       mov     esi, INT32 [esp+16]     ; esi=tmp7
+
+       lea     edx,[eax+ecx]   ; edx=z3
+       lea     edi,[ebx+esi]   ; edi=z4
+       add     eax,esi         ; eax=z1
+       add     ebx,ecx         ; ebx=z2
+
+       lea     esi,[edx+edi]
+       imul    esi,(F_1_175)   ; esi=z5
+
+       imul    edx,(-F_1_961)  ; edx=z3(=MULTIPLY(z3,-FIX_1_961570560))
+       imul    edi,(-F_0_390)  ; edi=z4(=MULTIPLY(z4,-FIX_0_390180644))
+       imul    eax,(-F_0_899)  ; eax=z1(=MULTIPLY(z1,-FIX_0_899976223))
+       imul    ebx,(-F_2_562)  ; ebx=z2(=MULTIPLY(z2,-FIX_2_562915447))
+
+       add     edx,esi         ; edx=z3(=z3+z5)
+       add     edi,esi         ; edi=z4(=z4+z5)
+
+       lea     ecx,[eax+edx]   ; ecx=z1+z3
+       lea     esi,[ebx+edi]   ; esi=z2+z4
+       add     eax,edi         ; eax=z1+z4
+       add     ebx,edx         ; ebx=z2+z3
+
+       pop     edx             ; edx=tmp4
+       pop     edi             ; edi=tmp5
+       imul    edx,(F_0_298)   ; edx=tmp4(=MULTIPLY(tmp4,FIX_0_298631336))
+       imul    edi,(F_2_053)   ; edi=tmp5(=MULTIPLY(tmp5,FIX_2_053119869))
+       add     ecx,edx         ; ecx=data7(=tmp4+z1+z3)
+       add     esi,edi         ; esi=data5(=tmp5+z2+z4)
+       pop     edx             ; dataptr
+       descale ecx,(CONST_BITS+PASS1_BITS)
+       descale esi,(CONST_BITS+PASS1_BITS)
+       mov     DCTELEM [COL(7,edx,SIZEOF_DCTELEM)], cx
+       mov     DCTELEM [COL(5,edx,SIZEOF_DCTELEM)], si
+
+       pop     edi             ; edi=tmp6
+       pop     ecx             ; ecx=tmp7
+       imul    edi,(F_3_072)   ; edi=tmp6(=MULTIPLY(tmp6,FIX_3_072711026))
+       imul    ecx,(F_1_501)   ; ecx=tmp7(=MULTIPLY(tmp7,FIX_1_501321110))
+       add     ebx,edi         ; ebx=data3(=tmp6+z2+z3)
+       add     eax,ecx         ; eax=data1(=tmp7+z1+z4)
+       pop     ecx             ; ctr
+       descale ebx,(CONST_BITS+PASS1_BITS)
+       descale eax,(CONST_BITS+PASS1_BITS)
+       mov     DCTELEM [COL(3,edx,SIZEOF_DCTELEM)], bx
+       mov     DCTELEM [COL(1,edx,SIZEOF_DCTELEM)], ax
+
+       add     edx, byte SIZEOF_DCTELEM    ; advance pointer to next column
+       dec     ecx
+       jnz     near .columnloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jfmmxfst.asm b/jfmmxfst.asm
new file mode 100644 (file)
index 0000000..2f8d53f
--- /dev/null
@@ -0,0 +1,404 @@
+;
+; jfmmxfst.asm - fast integer FDCT (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
+; for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+%ifdef JFDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8       ; 14 is also OK.
+
+%if CONST_BITS == 8
+F_0_382        equ      98             ; FIX(0.382683433)
+F_0_541        equ     139             ; FIX(0.541196100)
+F_0_707        equ     181             ; FIX(0.707106781)
+F_1_306        equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382        equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707        equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306        equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+       alignz  16
+       global  EXTN(jconst_fdct_ifast_mmx)
+
+EXTN(jconst_fdct_ifast_mmx):
+
+PW_F0707       times 4 dw  F_0_707 << CONST_SHIFT
+PW_F0382       times 4 dw  F_0_382 << CONST_SHIFT
+PW_F0541       times 4 dw  F_0_541 << CONST_SHIFT
+PW_F1306       times 4 dw  F_1_306 << CONST_SHIFT
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_ifast_mmx (DCTELEM * data)
+;
+
+%define data(b)                (b)+8           ; DCTELEM * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_fdct_ifast_mmx)
+
+EXTN(jpeg_fdct_ifast_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.rowloop:
+
+       movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+       movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+
+       ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+       ; mm1=(30 31 32 33), mm3=(34 35 36 37)
+
+       movq      mm4,mm0               ; transpose coefficients(phase 1)
+       punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+       punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+       movq      mm5,mm2               ; transpose coefficients(phase 1)
+       punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+       punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
+
+       movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+
+       ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+       ; mm7=(10 11 12 13), mm3=(14 15 16 17)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
+
+       movq      mm4,mm6               ; transpose coefficients(phase 1)
+       punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+       punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+       movq      mm2,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+       punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+       punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+       movq      mm3,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+       punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
+
+       movq    mm0,mm7
+       movq    mm5,mm6
+       psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+       psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+       paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+       paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+       movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+       movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+       movq      mm7,mm4               ; transpose coefficients(phase 2)
+       punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+       punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+       movq      mm6,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+       punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
+
+       movq    mm2,mm7
+       movq    mm3,mm4
+       paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+       paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+       psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+       psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm1,mm5
+       movq    mm6,mm0
+       psubw   mm5,mm7                 ; mm5=tmp13
+       psubw   mm0,mm4                 ; mm0=tmp12
+       paddw   mm1,mm7                 ; mm1=tmp10
+       paddw   mm6,mm4                 ; mm6=tmp11
+
+       paddw   mm0,mm5
+       psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+
+       movq    mm7,mm1
+       movq    mm4,mm5
+       psubw   mm1,mm6                 ; mm1=data4
+       psubw   mm5,mm0                 ; mm5=data6
+       paddw   mm7,mm6                 ; mm7=data0
+       paddw   mm4,mm0                 ; mm4=data2
+
+       movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1
+       movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+       movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+
+       ; -- Odd part
+
+       movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+       movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
+
+       paddw   mm2,mm3                 ; mm2=tmp10
+       paddw   mm3,mm6                 ; mm3=tmp11
+       paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
+
+       psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   mm6,PRE_MULTIPLY_SCALE_BITS
+
+       psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+
+       movq    mm1,mm2                 ; mm1=tmp10
+       psubw   mm2,mm6
+       pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+       pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+       pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+       paddw   mm1,mm2                 ; mm1=z2
+       paddw   mm6,mm2                 ; mm6=z4
+
+       movq    mm5,mm0
+       psubw   mm0,mm3                 ; mm0=z13
+       paddw   mm5,mm3                 ; mm5=z11
+
+       movq    mm7,mm0
+       movq    mm4,mm5
+       psubw   mm0,mm1                 ; mm0=data3
+       psubw   mm5,mm6                 ; mm5=data7
+       paddw   mm7,mm1                 ; mm7=data5
+       paddw   mm4,mm6                 ; mm4=data1
+
+       movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+       movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+
+       add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+       movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+       ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+       ; mm1=(03 13 23 33), mm3=(43 53 63 73)
+
+       movq      mm4,mm0               ; transpose coefficients(phase 1)
+       punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+       punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+       movq      mm5,mm2               ; transpose coefficients(phase 1)
+       punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+       punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
+
+       movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+
+       ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+       ; mm7=(01 11 21 31), mm3=(41 51 61 71)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
+
+       movq      mm4,mm6               ; transpose coefficients(phase 1)
+       punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+       punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+       movq      mm2,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+       punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+       punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+       movq      mm3,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+       punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
+
+       movq    mm0,mm7
+       movq    mm5,mm6
+       psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+       psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+       paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+       paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+       movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+       movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+       movq      mm7,mm4               ; transpose coefficients(phase 2)
+       punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+       punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+       movq      mm6,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+       punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
+
+       movq    mm2,mm7
+       movq    mm3,mm4
+       paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+       paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+       psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+       psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm1,mm5
+       movq    mm6,mm0
+       psubw   mm5,mm7                 ; mm5=tmp13
+       psubw   mm0,mm4                 ; mm0=tmp12
+       paddw   mm1,mm7                 ; mm1=tmp10
+       paddw   mm6,mm4                 ; mm6=tmp11
+
+       paddw   mm0,mm5
+       psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+
+       movq    mm7,mm1
+       movq    mm4,mm5
+       psubw   mm1,mm6                 ; mm1=data4
+       psubw   mm5,mm0                 ; mm5=data6
+       paddw   mm7,mm6                 ; mm7=data0
+       paddw   mm4,mm0                 ; mm4=data2
+
+       movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1
+       movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+       movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+
+       ; -- Odd part
+
+       movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+       movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
+
+       paddw   mm2,mm3                 ; mm2=tmp10
+       paddw   mm3,mm6                 ; mm3=tmp11
+       paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
+
+       psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   mm6,PRE_MULTIPLY_SCALE_BITS
+
+       psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+
+       movq    mm1,mm2                 ; mm1=tmp10
+       psubw   mm2,mm6
+       pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+       pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+       pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+       paddw   mm1,mm2                 ; mm1=z2
+       paddw   mm6,mm2                 ; mm6=z4
+
+       movq    mm5,mm0
+       psubw   mm0,mm3                 ; mm0=z13
+       paddw   mm5,mm3                 ; mm5=z11
+
+       movq    mm7,mm0
+       movq    mm4,mm5
+       psubw   mm0,mm1                 ; mm0=data3
+       psubw   mm5,mm6                 ; mm5=data7
+       paddw   mm7,mm1                 ; mm7=data5
+       paddw   mm4,mm6                 ; mm4=data1
+
+       movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+       movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+
+       add     edx, byte 4*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     near .columnloop
+
+       emms            ; empty MMX state
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_INT_MMX_SUPPORTED
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jfmmxint.asm b/jfmmxint.asm
new file mode 100644 (file)
index 0000000..afe47fd
--- /dev/null
@@ -0,0 +1,629 @@
+;
+; jfmmxint.asm - accurate integer FDCT (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+%ifdef JFDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1     (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2     (CONST_BITS+PASS1_BITS)
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fdct_islow_mmx)
+
+EXTN(jconst_fdct_islow_mmx):
+
+PW_F130_F054   times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130  times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117  times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078   times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089 times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060  times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256 times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050  times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X times 4 dw  1 << (PASS1_BITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_islow_mmx (DCTELEM * data)
+;
+
+%define data(b)                (b)+8           ; DCTELEM * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_fdct_islow_mmx)
+
+EXTN(jpeg_fdct_islow_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.rowloop:
+
+       movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+       movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+
+       ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+       ; mm1=(30 31 32 33), mm3=(34 35 36 37)
+
+       movq      mm4,mm0               ; transpose coefficients(phase 1)
+       punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+       punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+       movq      mm5,mm2               ; transpose coefficients(phase 1)
+       punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+       punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
+
+       movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+
+       ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+       ; mm7=(10 11 12 13), mm3=(14 15 16 17)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
+
+       movq      mm4,mm6               ; transpose coefficients(phase 1)
+       punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+       punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+       movq      mm2,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+       punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+       punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+       movq      mm3,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+       punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
+
+       movq    mm0,mm7
+       movq    mm5,mm6
+       psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+       psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+       paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+       paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+       movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+       movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+       movq      mm7,mm4               ; transpose coefficients(phase 2)
+       punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+       punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+       movq      mm6,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+       punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
+
+       movq    mm2,mm7
+       movq    mm3,mm4
+       paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+       paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+       psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+       psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm1,mm5
+       movq    mm6,mm0
+       paddw   mm5,mm7                 ; mm5=tmp10
+       paddw   mm0,mm4                 ; mm0=tmp11
+       psubw   mm1,mm7                 ; mm1=tmp13
+       psubw   mm6,mm4                 ; mm6=tmp12
+
+       movq    mm7,mm5
+       paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+       psubw   mm7,mm0                 ; mm7=tmp10-tmp11
+
+       psllw   mm5,PASS1_BITS          ; mm5=data0
+       psllw   mm7,PASS1_BITS          ; mm7=data4
+
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7
+
+       ; (Original)
+       ; z1 = (tmp12 + tmp13) * 0.541196100;
+       ; data2 = z1 + tmp13 * 0.765366865;
+       ; data6 = z1 + tmp12 * -1.847759065;
+       ;
+       ; (This implementation)
+       ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+       ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+       movq      mm4,mm1               ; mm1=tmp13
+       movq      mm0,mm1
+       punpcklwd mm4,mm6               ; mm6=tmp12
+       punpckhwd mm0,mm6
+       movq      mm1,mm4
+       movq      mm6,mm0
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
+
+       paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm4,DESCALE_P1
+       psrad   mm0,DESCALE_P1
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm1,DESCALE_P1
+       psrad   mm6,DESCALE_P1
+
+       packssdw  mm4,mm0               ; mm4=data2
+       packssdw  mm1,mm6               ; mm1=data6
+
+       movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+       movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1
+
+       ; -- Odd part
+
+       movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+       movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
+
+       movq    mm0,mm2                 ; mm2=tmp4
+       movq    mm6,mm3                 ; mm3=tmp5
+       paddw   mm0,mm5                 ; mm0=z3
+       paddw   mm6,mm7                 ; mm6=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movq      mm4,mm0
+       movq      mm1,mm0
+       punpcklwd mm4,mm6
+       punpckhwd mm1,mm6
+       movq      mm0,mm4
+       movq      mm6,mm1
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+       movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
+
+       ; (Original)
+       ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+       ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+       ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+       ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+       ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+       ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+       ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+       ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+       ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+       movq      mm4,mm2
+       movq      mm1,mm2
+       punpcklwd mm4,mm7
+       punpckhwd mm1,mm7
+       movq      mm2,mm4
+       movq      mm7,mm1
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
+
+       paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+       paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+       paddd   mm2,mm0                 ; mm2=data1L
+       paddd   mm7,mm6                 ; mm7=data1H
+
+       paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm4,DESCALE_P1
+       psrad   mm1,DESCALE_P1
+       paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm2,DESCALE_P1
+       psrad   mm7,DESCALE_P1
+
+       packssdw  mm4,mm1               ; mm4=data7
+       packssdw  mm2,mm7               ; mm2=data1
+
+       movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+
+       movq      mm1,mm3
+       movq      mm7,mm3
+       punpcklwd mm1,mm5
+       punpckhwd mm7,mm5
+       movq      mm3,mm1
+       movq      mm5,mm7
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+       pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+       pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
+
+       paddd   mm1,mm0                 ; mm1=data5L
+       paddd   mm7,mm6                 ; mm7=data5H
+       paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+       paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
+
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm1,DESCALE_P1
+       psrad   mm7,DESCALE_P1
+       paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   mm3,DESCALE_P1
+       psrad   mm5,DESCALE_P1
+
+       packssdw  mm1,mm7               ; mm1=data5
+       packssdw  mm3,mm5               ; mm3=data3
+
+       movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1
+       movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+
+       add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.columnloop:
+
+       movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+       movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+       ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+       ; mm1=(03 13 23 33), mm3=(43 53 63 73)
+
+       movq      mm4,mm0               ; transpose coefficients(phase 1)
+       punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+       punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+       movq      mm5,mm2               ; transpose coefficients(phase 1)
+       punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+       punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
+
+       movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+       movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+
+       ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+       ; mm7=(01 11 21 31), mm3=(41 51 61 71)
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
+
+       movq      mm4,mm6               ; transpose coefficients(phase 1)
+       punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+       punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+       movq      mm2,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+       punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+       punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+       movq      mm3,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+       punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
+
+       movq    mm0,mm7
+       movq    mm5,mm6
+       psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+       psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+       paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+       paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+       movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+       movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+       movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+       movq      mm7,mm4               ; transpose coefficients(phase 2)
+       punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+       punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+       movq      mm6,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+       punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
+
+       movq    mm2,mm7
+       movq    mm3,mm4
+       paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+       paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+       psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+       psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movq    mm1,mm5
+       movq    mm6,mm0
+       paddw   mm5,mm7                 ; mm5=tmp10
+       paddw   mm0,mm4                 ; mm0=tmp11
+       psubw   mm1,mm7                 ; mm1=tmp13
+       psubw   mm6,mm4                 ; mm6=tmp12
+
+       movq    mm7,mm5
+       paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+       psubw   mm7,mm0                 ; mm7=tmp10-tmp11
+
+       paddw   mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+       paddw   mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+       psraw   mm5,PASS1_BITS          ; mm5=data0
+       psraw   mm7,PASS1_BITS          ; mm7=data4
+
+       movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+       movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7
+
+       ; (Original)
+       ; z1 = (tmp12 + tmp13) * 0.541196100;
+       ; data2 = z1 + tmp13 * 0.765366865;
+       ; data6 = z1 + tmp12 * -1.847759065;
+       ;
+       ; (This implementation)
+       ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+       ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+       movq      mm4,mm1               ; mm1=tmp13
+       movq      mm0,mm1
+       punpcklwd mm4,mm6               ; mm6=tmp12
+       punpckhwd mm0,mm6
+       movq      mm1,mm4
+       movq      mm6,mm0
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
+
+       paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm4,DESCALE_P2
+       psrad   mm0,DESCALE_P2
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm1,DESCALE_P2
+       psrad   mm6,DESCALE_P2
+
+       packssdw  mm4,mm0               ; mm4=data2
+       packssdw  mm1,mm6               ; mm1=data6
+
+       movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+       movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1
+
+       ; -- Odd part
+
+       movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+       movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
+
+       movq    mm0,mm2                 ; mm2=tmp4
+       movq    mm6,mm3                 ; mm3=tmp5
+       paddw   mm0,mm5                 ; mm0=z3
+       paddw   mm6,mm7                 ; mm6=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movq      mm4,mm0
+       movq      mm1,mm0
+       punpcklwd mm4,mm6
+       punpckhwd mm1,mm6
+       movq      mm0,mm4
+       movq      mm6,mm1
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
+
+       movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+       movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
+
+       ; (Original)
+       ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+       ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+       ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+       ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+       ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+       ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+       ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+       ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+       ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+       movq      mm4,mm2
+       movq      mm1,mm2
+       punpcklwd mm4,mm7
+       punpckhwd mm1,mm7
+       movq      mm2,mm4
+       movq      mm7,mm1
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
+
+       paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+       paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+       paddd   mm2,mm0                 ; mm2=data1L
+       paddd   mm7,mm6                 ; mm7=data1H
+
+       paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm4,DESCALE_P2
+       psrad   mm1,DESCALE_P2
+       paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm2,DESCALE_P2
+       psrad   mm7,DESCALE_P2
+
+       packssdw  mm4,mm1               ; mm4=data7
+       packssdw  mm2,mm7               ; mm2=data1
+
+       movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4
+       movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+
+       movq      mm1,mm3
+       movq      mm7,mm3
+       punpcklwd mm1,mm5
+       punpckhwd mm7,mm5
+       movq      mm3,mm1
+       movq      mm5,mm7
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+       pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+       pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
+
+       paddd   mm1,mm0                 ; mm1=data5L
+       paddd   mm7,mm6                 ; mm7=data5H
+       paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+       paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
+
+       paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm1,DESCALE_P2
+       psrad   mm7,DESCALE_P2
+       paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   mm3,DESCALE_P2
+       psrad   mm5,DESCALE_P2
+
+       packssdw  mm1,mm7               ; mm1=data5
+       packssdw  mm3,mm5               ; mm3=data3
+
+       movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1
+       movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+
+       add     edx, byte 4*SIZEOF_DCTELEM
+       dec     ecx
+       jnz     near .columnloop
+
+       emms            ; empty MMX state
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_INT_MMX_SUPPORTED
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jfss2fst.asm b/jfss2fst.asm
new file mode 100644 (file)
index 0000000..567bcef
--- /dev/null
@@ -0,0 +1,411 @@
+;
+; jfss2fst.asm - fast integer FDCT (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
+; for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+%ifdef JFDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8       ; 14 is also OK.
+
+%if CONST_BITS == 8
+F_0_382        equ      98             ; FIX(0.382683433)
+F_0_541        equ     139             ; FIX(0.541196100)
+F_0_707        equ     181             ; FIX(0.707106781)
+F_1_306        equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382        equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707        equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306        equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+       alignz  16
+       global  EXTN(jconst_fdct_ifast_sse2)
+
+EXTN(jconst_fdct_ifast_sse2):
+
+PW_F0707       times 8 dw  F_0_707 << CONST_SHIFT
+PW_F0382       times 8 dw  F_0_382 << CONST_SHIFT
+PW_F0541       times 8 dw  F_0_541 << CONST_SHIFT
+PW_F1306       times 8 dw  F_1_306 << CONST_SHIFT
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_ifast_sse2 (DCTELEM * data)
+;
+
+%define data(b)                (b)+8           ; DCTELEM * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_fdct_ifast_sse2)
+
+EXTN(jpeg_fdct_ifast_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+
+       ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+       ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+
+       movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+       punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+       punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+       movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+       punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+       punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
+
+       movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+       ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+       ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+       movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
+
+       movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+       punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+       punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+       movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+       punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+       punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
+
+       movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+       punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+       movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+       punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+       punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
+
+       movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+       movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(42 52 62 72 43 53 63 73)
+       movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=(44 54 64 74 45 55 65 75)
+
+       movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+       punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+       punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+       movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+       punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+       punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
+
+       movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+       punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+       punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+       movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+       punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+       punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
+
+       movdqa  xmm6,xmm1
+       movdqa  xmm3,xmm0
+       psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+       psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+       paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+       paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
+
+       movdqa  xmm2, XMMWORD [wk(0)]   ; xmm2=(42 52 62 72 43 53 63 73)
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(44 54 64 74 45 55 65 75)
+       movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
+
+       movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+       punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+       punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+       movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+       punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+       punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
+
+       movdqa  xmm2,xmm1
+       movdqa  xmm5,xmm7
+       paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+       paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+       psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+       psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
+
+       ; -- Even part
+
+       movdqa  xmm4,xmm3
+       movdqa  xmm0,xmm6
+       psubw   xmm3,xmm1               ; xmm3=tmp13
+       psubw   xmm6,xmm7               ; xmm6=tmp12
+       paddw   xmm4,xmm1               ; xmm4=tmp10
+       paddw   xmm0,xmm7               ; xmm0=tmp11
+
+       paddw   xmm6,xmm3
+       psllw   xmm6,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1
+
+       movdqa  xmm1,xmm4
+       movdqa  xmm7,xmm3
+       psubw   xmm4,xmm0               ; xmm4=data4
+       psubw   xmm3,xmm6               ; xmm3=data6
+       paddw   xmm1,xmm0               ; xmm1=data0
+       paddw   xmm7,xmm6               ; xmm7=data2
+
+       movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=tmp6
+       movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp7
+       movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=data4
+       movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=data6
+
+       ; -- Odd part
+
+       paddw   xmm2,xmm5               ; xmm2=tmp10
+       paddw   xmm5,xmm0               ; xmm5=tmp11
+       paddw   xmm0,xmm6               ; xmm0=tmp12, xmm6=tmp7
+
+       psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+
+       psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3
+
+       movdqa  xmm4,xmm2               ; xmm4=tmp10
+       psubw   xmm2,xmm0
+       pmulhw  xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5
+       pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+       pmulhw  xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
+       paddw   xmm4,xmm2               ; xmm4=z2
+       paddw   xmm0,xmm2               ; xmm0=z4
+
+       movdqa  xmm3,xmm6
+       psubw   xmm6,xmm5               ; xmm6=z13
+       paddw   xmm3,xmm5               ; xmm3=z11
+
+       movdqa  xmm2,xmm6
+       movdqa  xmm5,xmm3
+       psubw   xmm6,xmm4               ; xmm6=data3
+       psubw   xmm3,xmm0               ; xmm3=data7
+       paddw   xmm2,xmm4               ; xmm2=data5
+       paddw   xmm5,xmm0               ; xmm5=data1
+
+       ; ---- Pass 2: process columns.
+
+;      mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+       ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
+       ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
+
+       movdqa    xmm4,xmm1             ; transpose coefficients(phase 1)
+       punpcklwd xmm1,xmm5             ; xmm1=(00 01 10 11 20 21 30 31)
+       punpckhwd xmm4,xmm5             ; xmm4=(40 41 50 51 60 61 70 71)
+       movdqa    xmm0,xmm7             ; transpose coefficients(phase 1)
+       punpcklwd xmm7,xmm6             ; xmm7=(02 03 12 13 22 23 32 33)
+       punpckhwd xmm0,xmm6             ; xmm0=(42 43 52 53 62 63 72 73)
+
+       movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=col4
+       movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=col6
+
+       ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
+       ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
+
+       movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(02 03 12 13 22 23 32 33)
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(42 43 52 53 62 63 72 73)
+
+       movdqa    xmm7,xmm5             ; transpose coefficients(phase 1)
+       punpcklwd xmm5,xmm2             ; xmm5=(04 05 14 15 24 25 34 35)
+       punpckhwd xmm7,xmm2             ; xmm7=(44 45 54 55 64 65 74 75)
+       movdqa    xmm0,xmm6             ; transpose coefficients(phase 1)
+       punpcklwd xmm6,xmm3             ; xmm6=(06 07 16 17 26 27 36 37)
+       punpckhwd xmm0,xmm3             ; xmm0=(46 47 56 57 66 67 76 77)
+
+       movdqa    xmm2,xmm5             ; transpose coefficients(phase 2)
+       punpckldq xmm5,xmm6             ; xmm5=(04 05 06 07 14 15 16 17)
+       punpckhdq xmm2,xmm6             ; xmm2=(24 25 26 27 34 35 36 37)
+       movdqa    xmm3,xmm7             ; transpose coefficients(phase 2)
+       punpckldq xmm7,xmm0             ; xmm7=(44 45 46 47 54 55 56 57)
+       punpckhdq xmm3,xmm0             ; xmm3=(64 65 66 67 74 75 76 77)
+
+       movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=(02 03 12 13 22 23 32 33)
+       movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(42 43 52 53 62 63 72 73)
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(24 25 26 27 34 35 36 37)
+       movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(44 45 46 47 54 55 56 57)
+
+       movdqa    xmm2,xmm1             ; transpose coefficients(phase 2)
+       punpckldq xmm1,xmm6             ; xmm1=(00 01 02 03 10 11 12 13)
+       punpckhdq xmm2,xmm6             ; xmm2=(20 21 22 23 30 31 32 33)
+       movdqa    xmm7,xmm4             ; transpose coefficients(phase 2)
+       punpckldq xmm4,xmm0             ; xmm4=(40 41 42 43 50 51 52 53)
+       punpckhdq xmm7,xmm0             ; xmm7=(60 61 62 63 70 71 72 73)
+
+       movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+       punpcklqdq xmm1,xmm5            ; xmm1=(00 01 02 03 04 05 06 07)=data0
+       punpckhqdq xmm6,xmm5            ; xmm6=(10 11 12 13 14 15 16 17)=data1
+       movdqa     xmm0,xmm7            ; transpose coefficients(phase 3)
+       punpcklqdq xmm7,xmm3            ; xmm7=(60 61 62 63 64 65 66 67)=data6
+       punpckhqdq xmm0,xmm3            ; xmm0=(70 71 72 73 74 75 76 77)=data7
+
+       movdqa  xmm5,xmm6
+       movdqa  xmm3,xmm1
+       psubw   xmm6,xmm7               ; xmm6=data1-data6=tmp6
+       psubw   xmm1,xmm0               ; xmm1=data0-data7=tmp7
+       paddw   xmm5,xmm7               ; xmm5=data1+data6=tmp1
+       paddw   xmm3,xmm0               ; xmm3=data0+data7=tmp0
+
+       movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(24 25 26 27 34 35 36 37)
+       movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(44 45 46 47 54 55 56 57)
+       movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=tmp6
+       movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=tmp7
+
+       movdqa     xmm6,xmm2            ; transpose coefficients(phase 3)
+       punpcklqdq xmm2,xmm7            ; xmm2=(20 21 22 23 24 25 26 27)=data2
+       punpckhqdq xmm6,xmm7            ; xmm6=(30 31 32 33 34 35 36 37)=data3
+       movdqa     xmm1,xmm4            ; transpose coefficients(phase 3)
+       punpcklqdq xmm4,xmm0            ; xmm4=(40 41 42 43 44 45 46 47)=data4
+       punpckhqdq xmm1,xmm0            ; xmm1=(50 51 52 53 54 55 56 57)=data5
+
+       movdqa  xmm7,xmm6
+       movdqa  xmm0,xmm2
+       paddw   xmm6,xmm4               ; xmm6=data3+data4=tmp3
+       paddw   xmm2,xmm1               ; xmm2=data2+data5=tmp2
+       psubw   xmm7,xmm4               ; xmm7=data3-data4=tmp4
+       psubw   xmm0,xmm1               ; xmm0=data2-data5=tmp5
+
+       ; -- Even part
+
+       movdqa  xmm4,xmm3
+       movdqa  xmm1,xmm5
+       psubw   xmm3,xmm6               ; xmm3=tmp13
+       psubw   xmm5,xmm2               ; xmm5=tmp12
+       paddw   xmm4,xmm6               ; xmm4=tmp10
+       paddw   xmm1,xmm2               ; xmm1=tmp11
+
+       paddw   xmm5,xmm3
+       psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1
+
+       movdqa  xmm6,xmm4
+       movdqa  xmm2,xmm3
+       psubw   xmm4,xmm1               ; xmm4=data4
+       psubw   xmm3,xmm5               ; xmm3=data6
+       paddw   xmm6,xmm1               ; xmm6=data0
+       paddw   xmm2,xmm5               ; xmm2=data2
+
+       movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4
+       movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3
+       movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6
+       movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2
+
+       ; -- Odd part
+
+       movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp6
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
+
+       paddw   xmm7,xmm0               ; xmm7=tmp10
+       paddw   xmm0,xmm1               ; xmm0=tmp11
+       paddw   xmm1,xmm5               ; xmm1=tmp12, xmm5=tmp7
+
+       psllw   xmm7,PRE_MULTIPLY_SCALE_BITS
+       psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+
+       psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3
+
+       movdqa  xmm4,xmm7               ; xmm4=tmp10
+       psubw   xmm7,xmm1
+       pmulhw  xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5
+       pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+       pmulhw  xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
+       paddw   xmm4,xmm7               ; xmm4=z2
+       paddw   xmm1,xmm7               ; xmm1=z4
+
+       movdqa  xmm3,xmm5
+       psubw   xmm5,xmm0               ; xmm5=z13
+       paddw   xmm3,xmm0               ; xmm3=z11
+
+       movdqa  xmm6,xmm5
+       movdqa  xmm2,xmm3
+       psubw   xmm5,xmm4               ; xmm5=data3
+       psubw   xmm3,xmm1               ; xmm3=data7
+       paddw   xmm6,xmm4               ; xmm6=data5
+       paddw   xmm2,xmm1               ; xmm2=data1
+
+       movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5
+       movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3
+       movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6
+       movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_INT_SSE2_SUPPORTED
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jfss2int.asm b/jfss2int.asm
new file mode 100644 (file)
index 0000000..106b42c
--- /dev/null
@@ -0,0 +1,641 @@
+;
+; jfss2int.asm - accurate integer FDCT (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+%ifdef JFDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1     (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2     (CONST_BITS+PASS1_BITS)
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fdct_islow_sse2)
+
+EXTN(jconst_fdct_islow_sse2):
+
+PW_F130_F054   times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130  times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117  times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078   times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089 times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060  times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256 times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050  times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS-1)
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_islow_sse2 (DCTELEM * data)
+;
+
+%define data(b)                (b)+8           ; DCTELEM * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         6
+
+       align   16
+       global  EXTN(jpeg_fdct_islow_sse2)
+
+EXTN(jpeg_fdct_islow_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+
+       ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+       ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+
+       movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+       punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+       punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+       movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+       punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+       punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
+
+       movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+       ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+       ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+       movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
+
+       movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+       punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+       punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+       movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+       punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+       punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
+
+       movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+       punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+       movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+       punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+       punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
+
+       movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+       movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=(42 52 62 72 43 53 63 73)
+       movdqa  XMMWORD [wk(3)], xmm2   ; wk(3)=(44 54 64 74 45 55 65 75)
+
+       movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+       punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+       punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+       movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+       punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+       punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
+
+       movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+       punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+       punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+       movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+       punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+       punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
+
+       movdqa  xmm6,xmm1
+       movdqa  xmm3,xmm0
+       psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+       psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+       paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+       paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
+
+       movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(42 52 62 72 43 53 63 73)
+       movdqa  xmm5, XMMWORD [wk(3)]   ; xmm5=(44 54 64 74 45 55 65 75)
+       movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
+
+       movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+       punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+       punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+       movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+       punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+       punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
+
+       movdqa  xmm2,xmm1
+       movdqa  xmm5,xmm7
+       paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+       paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+       psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+       psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
+
+       ; -- Even part
+
+       movdqa  xmm4,xmm3
+       movdqa  xmm0,xmm6
+       paddw   xmm3,xmm1               ; xmm3=tmp10
+       paddw   xmm6,xmm7               ; xmm6=tmp11
+       psubw   xmm4,xmm1               ; xmm4=tmp13
+       psubw   xmm0,xmm7               ; xmm0=tmp12
+
+       movdqa  xmm1,xmm3
+       paddw   xmm3,xmm6               ; xmm3=tmp10+tmp11
+       psubw   xmm1,xmm6               ; xmm1=tmp10-tmp11
+
+       psllw   xmm3,PASS1_BITS         ; xmm3=data0
+       psllw   xmm1,PASS1_BITS         ; xmm1=data4
+
+       movdqa  XMMWORD [wk(2)], xmm3   ; wk(2)=data0
+       movdqa  XMMWORD [wk(3)], xmm1   ; wk(3)=data4
+
+       ; (Original)
+       ; z1 = (tmp12 + tmp13) * 0.541196100;
+       ; data2 = z1 + tmp13 * 0.765366865;
+       ; data6 = z1 + tmp12 * -1.847759065;
+       ;
+       ; (This implementation)
+       ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+       ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+       movdqa    xmm7,xmm4             ; xmm4=tmp13
+       movdqa    xmm6,xmm4
+       punpcklwd xmm7,xmm0             ; xmm0=tmp12
+       punpckhwd xmm6,xmm0
+       movdqa    xmm4,xmm7
+       movdqa    xmm0,xmm6
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_F130_F054)]       ; xmm7=data2L
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=data2H
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm4=data6L
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm0=data6H
+
+       paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm7,DESCALE_P1
+       psrad   xmm6,DESCALE_P1
+       paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm4,DESCALE_P1
+       psrad   xmm0,DESCALE_P1
+
+       packssdw  xmm7,xmm6             ; xmm7=data2
+       packssdw  xmm4,xmm0             ; xmm4=data6
+
+       movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=data2
+       movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=data6
+
+       ; -- Odd part
+
+       movdqa  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp6
+       movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp7
+
+       movdqa  xmm6,xmm2               ; xmm2=tmp4
+       movdqa  xmm0,xmm5               ; xmm5=tmp5
+       paddw   xmm6,xmm3               ; xmm6=z3
+       paddw   xmm0,xmm1               ; xmm0=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movdqa    xmm7,xmm6
+       movdqa    xmm4,xmm6
+       punpcklwd xmm7,xmm0
+       punpckhwd xmm4,xmm0
+       movdqa    xmm6,xmm7
+       movdqa    xmm0,xmm4
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3L
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3H
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4L
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_F117_F078)]       ; xmm0=z4H
+
+       movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=z3L
+       movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=z3H
+
+       ; (Original)
+       ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+       ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+       ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+       ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+       ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+       ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+       ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+       ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+       ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+       movdqa    xmm7,xmm2
+       movdqa    xmm4,xmm2
+       punpcklwd xmm7,xmm1
+       punpckhwd xmm4,xmm1
+       movdqa    xmm2,xmm7
+       movdqa    xmm1,xmm4
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp4L
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4H
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm2=tmp7L
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp7H
+
+       paddd   xmm7, XMMWORD [wk(0)]   ; xmm7=data7L
+       paddd   xmm4, XMMWORD [wk(1)]   ; xmm4=data7H
+       paddd   xmm2,xmm6               ; xmm2=data1L
+       paddd   xmm1,xmm0               ; xmm1=data1H
+
+       paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm7,DESCALE_P1
+       psrad   xmm4,DESCALE_P1
+       paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm2,DESCALE_P1
+       psrad   xmm1,DESCALE_P1
+
+       packssdw  xmm7,xmm4             ; xmm7=data7
+       packssdw  xmm2,xmm1             ; xmm2=data1
+
+       movdqa    xmm4,xmm5
+       movdqa    xmm1,xmm5
+       punpcklwd xmm4,xmm3
+       punpckhwd xmm1,xmm3
+       movdqa    xmm5,xmm4
+       movdqa    xmm3,xmm1
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm4=tmp5L
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5H
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm5=tmp6L
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6H
+
+       paddd   xmm4,xmm6               ; xmm4=data5L
+       paddd   xmm1,xmm0               ; xmm1=data5H
+       paddd   xmm5, XMMWORD [wk(0)]   ; xmm5=data3L
+       paddd   xmm3, XMMWORD [wk(1)]   ; xmm3=data3H
+
+       paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm4,DESCALE_P1
+       psrad   xmm1,DESCALE_P1
+       paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+       paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+       psrad   xmm5,DESCALE_P1
+       psrad   xmm3,DESCALE_P1
+
+       packssdw  xmm4,xmm1             ; xmm4=data5
+       packssdw  xmm5,xmm3             ; xmm5=data3
+
+       ; ---- Pass 2: process columns.
+
+;      mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+       movdqa  xmm6, XMMWORD [wk(2)]   ; xmm6=col0
+       movdqa  xmm0, XMMWORD [wk(4)]   ; xmm0=col2
+
+       ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
+       ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
+
+       movdqa    xmm1,xmm6             ; transpose coefficients(phase 1)
+       punpcklwd xmm6,xmm2             ; xmm6=(00 01 10 11 20 21 30 31)
+       punpckhwd xmm1,xmm2             ; xmm1=(40 41 50 51 60 61 70 71)
+       movdqa    xmm3,xmm0             ; transpose coefficients(phase 1)
+       punpcklwd xmm0,xmm5             ; xmm0=(02 03 12 13 22 23 32 33)
+       punpckhwd xmm3,xmm5             ; xmm3=(42 43 52 53 62 63 72 73)
+
+       movdqa  xmm2, XMMWORD [wk(3)]   ; xmm2=col4
+       movdqa  xmm5, XMMWORD [wk(5)]   ; xmm5=col6
+
+       ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
+       ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
+
+       movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=(02 03 12 13 22 23 32 33)
+       movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(42 43 52 53 62 63 72 73)
+
+       movdqa    xmm0,xmm2             ; transpose coefficients(phase 1)
+       punpcklwd xmm2,xmm4             ; xmm2=(04 05 14 15 24 25 34 35)
+       punpckhwd xmm0,xmm4             ; xmm0=(44 45 54 55 64 65 74 75)
+       movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+       punpcklwd xmm5,xmm7             ; xmm5=(06 07 16 17 26 27 36 37)
+       punpckhwd xmm3,xmm7             ; xmm3=(46 47 56 57 66 67 76 77)
+
+       movdqa    xmm4,xmm2             ; transpose coefficients(phase 2)
+       punpckldq xmm2,xmm5             ; xmm2=(04 05 06 07 14 15 16 17)
+       punpckhdq xmm4,xmm5             ; xmm4=(24 25 26 27 34 35 36 37)
+       movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+       punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+       punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
+
+       movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=(02 03 12 13 22 23 32 33)
+       movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53 62 63 72 73)
+       movdqa  XMMWORD [wk(2)], xmm4   ; wk(2)=(24 25 26 27 34 35 36 37)
+       movdqa  XMMWORD [wk(3)], xmm0   ; wk(3)=(44 45 46 47 54 55 56 57)
+
+       movdqa    xmm4,xmm6             ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm5             ; xmm6=(00 01 02 03 10 11 12 13)
+       punpckhdq xmm4,xmm5             ; xmm4=(20 21 22 23 30 31 32 33)
+       movdqa    xmm0,xmm1             ; transpose coefficients(phase 2)
+       punpckldq xmm1,xmm3             ; xmm1=(40 41 42 43 50 51 52 53)
+       punpckhdq xmm0,xmm3             ; xmm0=(60 61 62 63 70 71 72 73)
+
+       movdqa     xmm5,xmm6            ; transpose coefficients(phase 3)
+       punpcklqdq xmm6,xmm2            ; xmm6=(00 01 02 03 04 05 06 07)=data0
+       punpckhqdq xmm5,xmm2            ; xmm5=(10 11 12 13 14 15 16 17)=data1
+       movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+       punpcklqdq xmm0,xmm7            ; xmm0=(60 61 62 63 64 65 66 67)=data6
+       punpckhqdq xmm3,xmm7            ; xmm3=(70 71 72 73 74 75 76 77)=data7
+
+       movdqa  xmm2,xmm5
+       movdqa  xmm7,xmm6
+       psubw   xmm5,xmm0               ; xmm5=data1-data6=tmp6
+       psubw   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+       paddw   xmm2,xmm0               ; xmm2=data1+data6=tmp1
+       paddw   xmm7,xmm3               ; xmm7=data0+data7=tmp0
+
+       movdqa  xmm0, XMMWORD [wk(2)]   ; xmm0=(24 25 26 27 34 35 36 37)
+       movdqa  xmm3, XMMWORD [wk(3)]   ; xmm3=(44 45 46 47 54 55 56 57)
+       movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=tmp6
+       movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+       movdqa     xmm5,xmm4            ; transpose coefficients(phase 3)
+       punpcklqdq xmm4,xmm0            ; xmm4=(20 21 22 23 24 25 26 27)=data2
+       punpckhqdq xmm5,xmm0            ; xmm5=(30 31 32 33 34 35 36 37)=data3
+       movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+       punpcklqdq xmm1,xmm3            ; xmm1=(40 41 42 43 44 45 46 47)=data4
+       punpckhqdq xmm6,xmm3            ; xmm6=(50 51 52 53 54 55 56 57)=data5
+
+       movdqa  xmm0,xmm5
+       movdqa  xmm3,xmm4
+       paddw   xmm5,xmm1               ; xmm5=data3+data4=tmp3
+       paddw   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+       psubw   xmm0,xmm1               ; xmm0=data3-data4=tmp4
+       psubw   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movdqa  xmm1,xmm7
+       movdqa  xmm6,xmm2
+       paddw   xmm7,xmm5               ; xmm7=tmp10
+       paddw   xmm2,xmm4               ; xmm2=tmp11
+       psubw   xmm1,xmm5               ; xmm1=tmp13
+       psubw   xmm6,xmm4               ; xmm6=tmp12
+
+       movdqa  xmm5,xmm7
+       paddw   xmm7,xmm2               ; xmm7=tmp10+tmp11
+       psubw   xmm5,xmm2               ; xmm5=tmp10-tmp11
+
+       paddw   xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+       paddw   xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+       psraw   xmm7,PASS1_BITS         ; xmm7=data0
+       psraw   xmm5,PASS1_BITS         ; xmm5=data4
+
+       movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7
+       movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5
+
+       ; (Original)
+       ; z1 = (tmp12 + tmp13) * 0.541196100;
+       ; data2 = z1 + tmp13 * 0.765366865;
+       ; data6 = z1 + tmp12 * -1.847759065;
+       ;
+       ; (This implementation)
+       ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+       ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+       movdqa    xmm4,xmm1             ; xmm1=tmp13
+       movdqa    xmm2,xmm1
+       punpcklwd xmm4,xmm6             ; xmm6=tmp12
+       punpckhwd xmm2,xmm6
+       movdqa    xmm1,xmm4
+       movdqa    xmm6,xmm2
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=data2L
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_F130_F054)]       ; xmm2=data2H
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=data6L
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm6=data6H
+
+       paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm4,DESCALE_P2
+       psrad   xmm2,DESCALE_P2
+       paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm1,DESCALE_P2
+       psrad   xmm6,DESCALE_P2
+
+       packssdw  xmm4,xmm2             ; xmm4=data2
+       packssdw  xmm1,xmm6             ; xmm1=data6
+
+       movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4
+       movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1
+
+       ; -- Odd part
+
+       movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp6
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
+
+       movdqa  xmm2,xmm0               ; xmm0=tmp4
+       movdqa  xmm6,xmm3               ; xmm3=tmp5
+       paddw   xmm2,xmm7               ; xmm2=z3
+       paddw   xmm6,xmm5               ; xmm6=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movdqa    xmm4,xmm2
+       movdqa    xmm1,xmm2
+       punpcklwd xmm4,xmm6
+       punpckhwd xmm1,xmm6
+       movdqa    xmm2,xmm4
+       movdqa    xmm6,xmm1
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3L
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm1=z3H
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_F117_F078)]       ; xmm2=z4L
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4H
+
+       movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=z3L
+       movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=z3H
+
+       ; (Original)
+       ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+       ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+       ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+       ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+       ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+       ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+       ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+       ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+       ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+       movdqa    xmm4,xmm0
+       movdqa    xmm1,xmm0
+       punpcklwd xmm4,xmm5
+       punpckhwd xmm1,xmm5
+       movdqa    xmm0,xmm4
+       movdqa    xmm5,xmm1
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4L
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm1=tmp4H
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm0=tmp7L
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm5=tmp7H
+
+       paddd   xmm4, XMMWORD [wk(0)]   ; xmm4=data7L
+       paddd   xmm1, XMMWORD [wk(1)]   ; xmm1=data7H
+       paddd   xmm0,xmm2               ; xmm0=data1L
+       paddd   xmm5,xmm6               ; xmm5=data1H
+
+       paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm4,DESCALE_P2
+       psrad   xmm1,DESCALE_P2
+       paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm0,DESCALE_P2
+       psrad   xmm5,DESCALE_P2
+
+       packssdw  xmm4,xmm1             ; xmm4=data7
+       packssdw  xmm0,xmm5             ; xmm0=data1
+
+       movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4
+       movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0
+
+       movdqa    xmm1,xmm3
+       movdqa    xmm5,xmm3
+       punpcklwd xmm1,xmm7
+       punpckhwd xmm5,xmm7
+       movdqa    xmm3,xmm1
+       movdqa    xmm7,xmm5
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5L
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm5=tmp5H
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6L
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm7=tmp6H
+
+       paddd   xmm1,xmm2               ; xmm1=data5L
+       paddd   xmm5,xmm6               ; xmm5=data5H
+       paddd   xmm3, XMMWORD [wk(0)]   ; xmm3=data3L
+       paddd   xmm7, XMMWORD [wk(1)]   ; xmm7=data3H
+
+       paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm1,DESCALE_P2
+       psrad   xmm5,DESCALE_P2
+       paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+       paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+       psrad   xmm3,DESCALE_P2
+       psrad   xmm7,DESCALE_P2
+
+       packssdw  xmm1,xmm5             ; xmm1=data5
+       packssdw  xmm3,xmm7             ; xmm3=data3
+
+       movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1
+       movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_INT_SSE2_SUPPORTED
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jfsseflt.asm b/jfsseflt.asm
new file mode 100644 (file)
index 0000000..98b0973
--- /dev/null
@@ -0,0 +1,383 @@
+;
+; jfsseflt.asm - floating-point FDCT (SSE)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JFDCT_FLT_SSE_MMX_SUPPORTED
+%define JFDCT_FLT_SSE_SUPPORTED
+%endif
+%ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED
+%define JFDCT_FLT_SSE_SUPPORTED
+%endif
+%ifdef JFDCT_FLT_SSE_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%macro unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+       shufps  %1,%2,0x44
+%endmacro
+
+%macro unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+       shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_fdct_float_sse)
+
+EXTN(jconst_fdct_float_sse):
+
+PD_0_382       times 4 dd  0.382683432365089771728460
+PD_0_707       times 4 dd  0.707106781186547524400844
+PD_0_541       times 4 dd  0.541196100146196984399723
+PD_1_306       times 4 dd  1.306562964876376527856643
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jpeg_fdct_float_sse (FAST_FLOAT * data)
+;
+
+%define data(b)                (b)+8           ; FAST_FLOAT * data
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_fdct_float_sse)
+
+EXTN(jpeg_fdct_float_sse):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process rows.
+
+       mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.rowloop:
+
+       movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)]
+
+       ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
+       ; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
+
+       movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+       unpcklps xmm0,xmm1              ; xmm0=(20 30 21 31)
+       unpckhps xmm4,xmm1              ; xmm4=(22 32 23 33)
+       movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+       unpcklps xmm2,xmm3              ; xmm2=(24 34 25 35)
+       unpckhps xmm5,xmm3              ; xmm5=(26 36 27 37)
+
+       movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+       ; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
+       ; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
+
+       movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 32 23 33)
+       movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(24 34 25 35)
+
+       movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+       unpcklps xmm6,xmm7              ; xmm6=(00 10 01 11)
+       unpckhps xmm4,xmm7              ; xmm4=(02 12 03 13)
+       movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+       unpcklps xmm1,xmm3              ; xmm1=(04 14 05 15)
+       unpckhps xmm2,xmm3              ; xmm2=(06 16 07 17)
+
+       movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+       unpcklps2 xmm6,xmm0             ; xmm6=(00 10 20 30)=data0
+       unpckhps2 xmm7,xmm0             ; xmm7=(01 11 21 31)=data1
+       movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+       unpcklps2 xmm2,xmm5             ; xmm2=(06 16 26 36)=data6
+       unpckhps2 xmm3,xmm5             ; xmm3=(07 17 27 37)=data7
+
+       movaps  xmm0,xmm7
+       movaps  xmm5,xmm6
+       subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+       subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+       addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+       addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+       movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 32 23 33)
+       movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(24 34 25 35)
+       movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+       movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+       movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+       unpcklps2 xmm4,xmm2             ; xmm4=(02 12 22 32)=data2
+       unpckhps2 xmm7,xmm2             ; xmm7=(03 13 23 33)=data3
+       movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+       unpcklps2 xmm1,xmm3             ; xmm1=(04 14 24 34)=data4
+       unpckhps2 xmm6,xmm3             ; xmm6=(05 15 25 35)=data5
+
+       movaps  xmm2,xmm7
+       movaps  xmm3,xmm4
+       addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+       addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+       subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+       subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movaps  xmm1,xmm5
+       movaps  xmm6,xmm0
+       subps   xmm5,xmm7               ; xmm5=tmp13
+       subps   xmm0,xmm4               ; xmm0=tmp12
+       addps   xmm1,xmm7               ; xmm1=tmp10
+       addps   xmm6,xmm4               ; xmm6=tmp11
+
+       addps   xmm0,xmm5
+       mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+
+       movaps  xmm7,xmm1
+       movaps  xmm4,xmm5
+       subps   xmm1,xmm6               ; xmm1=data4
+       subps   xmm5,xmm0               ; xmm5=data6
+       addps   xmm7,xmm6               ; xmm7=data0
+       addps   xmm4,xmm0               ; xmm4=data2
+
+       movaps  XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+       movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+       ; -- Odd part
+
+       movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+       movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+       addps   xmm2,xmm3               ; xmm2=tmp10
+       addps   xmm3,xmm6               ; xmm3=tmp11
+       addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+       mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+
+       movaps  xmm1,xmm2               ; xmm1=tmp10
+       subps   xmm2,xmm6
+       mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+       mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+       mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+       addps   xmm1,xmm2               ; xmm1=z2
+       addps   xmm6,xmm2               ; xmm6=z4
+
+       movaps  xmm5,xmm0
+       subps   xmm0,xmm3               ; xmm0=z13
+       addps   xmm5,xmm3               ; xmm5=z11
+
+       movaps  xmm7,xmm0
+       movaps  xmm4,xmm5
+       subps   xmm0,xmm1               ; xmm0=data3
+       subps   xmm5,xmm6               ; xmm5=data7
+       addps   xmm7,xmm1               ; xmm7=data5
+       addps   xmm4,xmm6               ; xmm4=data1
+
+       movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7
+       movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+       add     edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .rowloop
+
+       ; ---- Pass 2: process columns.
+
+       mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+       mov     ecx, DCTSIZE/4
+       alignx  16,7
+.columnloop:
+
+       movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
+       ; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
+
+       movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+       unpcklps xmm0,xmm1              ; xmm0=(02 03 12 13)
+       unpckhps xmm4,xmm1              ; xmm4=(22 23 32 33)
+       movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+       unpcklps xmm2,xmm3              ; xmm2=(42 43 52 53)
+       unpckhps xmm5,xmm3              ; xmm5=(62 63 72 73)
+
+       movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+
+       ; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
+       ; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
+
+       movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 23 32 33)
+       movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(42 43 52 53)
+
+       movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+       unpcklps xmm6,xmm7              ; xmm6=(00 01 10 11)
+       unpckhps xmm4,xmm7              ; xmm4=(20 21 30 31)
+       movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+       unpcklps xmm1,xmm3              ; xmm1=(40 41 50 51)
+       unpckhps xmm2,xmm3              ; xmm2=(60 61 70 71)
+
+       movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+       unpcklps2 xmm6,xmm0             ; xmm6=(00 01 02 03)=data0
+       unpckhps2 xmm7,xmm0             ; xmm7=(10 11 12 13)=data1
+       movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+       unpcklps2 xmm2,xmm5             ; xmm2=(60 61 62 63)=data6
+       unpckhps2 xmm3,xmm5             ; xmm3=(70 71 72 73)=data7
+
+       movaps  xmm0,xmm7
+       movaps  xmm5,xmm6
+       subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+       subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+       addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+       addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+       movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 23 32 33)
+       movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53)
+       movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+       movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+       movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+       unpcklps2 xmm4,xmm2             ; xmm4=(20 21 22 23)=data2
+       unpckhps2 xmm7,xmm2             ; xmm7=(30 31 32 33)=data3
+       movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+       unpcklps2 xmm1,xmm3             ; xmm1=(40 41 42 43)=data4
+       unpckhps2 xmm6,xmm3             ; xmm6=(50 51 52 53)=data5
+
+       movaps  xmm2,xmm7
+       movaps  xmm3,xmm4
+       addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+       addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+       subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+       subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+       ; -- Even part
+
+       movaps  xmm1,xmm5
+       movaps  xmm6,xmm0
+       subps   xmm5,xmm7               ; xmm5=tmp13
+       subps   xmm0,xmm4               ; xmm0=tmp12
+       addps   xmm1,xmm7               ; xmm1=tmp10
+       addps   xmm6,xmm4               ; xmm6=tmp11
+
+       addps   xmm0,xmm5
+       mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+
+       movaps  xmm7,xmm1
+       movaps  xmm4,xmm5
+       subps   xmm1,xmm6               ; xmm1=data4
+       subps   xmm5,xmm0               ; xmm5=data6
+       addps   xmm7,xmm6               ; xmm7=data0
+       addps   xmm4,xmm0               ; xmm4=data2
+
+       movaps  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+       movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+       ; -- Odd part
+
+       movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+       movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+       addps   xmm2,xmm3               ; xmm2=tmp10
+       addps   xmm3,xmm6               ; xmm3=tmp11
+       addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+       mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+
+       movaps  xmm1,xmm2               ; xmm1=tmp10
+       subps   xmm2,xmm6
+       mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+       mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+       mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+       addps   xmm1,xmm2               ; xmm1=z2
+       addps   xmm6,xmm2               ; xmm6=z4
+
+       movaps  xmm5,xmm0
+       subps   xmm0,xmm3               ; xmm0=z13
+       addps   xmm5,xmm3               ; xmm5=z11
+
+       movaps  xmm7,xmm0
+       movaps  xmm4,xmm5
+       subps   xmm0,xmm1               ; xmm0=data3
+       subps   xmm5,xmm6               ; xmm5=data7
+       addps   xmm7,xmm1               ; xmm7=data5
+       addps   xmm4,xmm6               ; xmm4=data1
+
+       movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+       movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+       add     edx, byte 4*SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .columnloop
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JFDCT_FLT_SSE_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/ji3dnflt.asm b/ji3dnflt.asm
new file mode 100644 (file)
index 0000000..9c31e99
--- /dev/null
@@ -0,0 +1,462 @@
+;
+; ji3dnflt.asm - floating-point IDCT (3DNow! & MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_float_3dnow)
+
+EXTN(jconst_idct_float_3dnow):
+
+PD_1_414       times 2 dd  1.414213562373095048801689
+PD_1_847       times 2 dd  1.847759065022573512256366
+PD_1_082       times 2 dd  1.082392200292393968799446
+PD_2_613       times 2 dd  2.613125929752753055713286
+PD_RNDINT_MAGIC        times 2 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_float_3dnow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                        JCOEFPTR coef_block,
+;                        JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                       ; FAST_FLOAT workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_float_3dnow)
+
+EXTN(jpeg_idct_float_3dnow):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     ecx, DCTSIZE/2                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       pushpic ebx             ; save GOT address
+       mov     ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       mov     eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       or      ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       or      ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       or      eax,ebx
+       poppic  ebx             ; restore GOT address
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd mm0,mm0
+       psrad     mm0,(DWORD_BIT-WORD_BIT)
+       pi2fd     mm0,mm0
+
+       pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movq      mm1,mm0
+       punpckldq mm0,mm0
+       punpckhdq mm1,mm1
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1
+       movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
+       movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movd      mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movd      mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movd      mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd mm0,mm0
+       punpcklwd mm1,mm1
+       psrad     mm0,(DWORD_BIT-WORD_BIT)
+       psrad     mm1,(DWORD_BIT-WORD_BIT)
+       pi2fd     mm0,mm0
+       pi2fd     mm1,mm1
+
+       pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       pfmul     mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       punpcklwd mm2,mm2
+       punpcklwd mm3,mm3
+       psrad     mm2,(DWORD_BIT-WORD_BIT)
+       psrad     mm3,(DWORD_BIT-WORD_BIT)
+       pi2fd     mm2,mm2
+       pi2fd     mm3,mm3
+
+       pfmul     mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       pfmul     mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movq    mm4,mm0
+       movq    mm5,mm1
+       pfsub   mm0,mm2                 ; mm0=tmp11
+       pfsub   mm1,mm3
+       pfadd   mm4,mm2                 ; mm4=tmp10
+       pfadd   mm5,mm3                 ; mm5=tmp13
+
+       pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+       pfsub   mm1,mm5                 ; mm1=tmp12
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       pfsub   mm4,mm5                 ; mm4=tmp3
+       pfsub   mm0,mm1                 ; mm0=tmp2
+       pfadd   mm6,mm5                 ; mm6=tmp0
+       pfadd   mm7,mm1                 ; mm7=tmp1
+
+       movq    MMWORD [wk(1)], mm4     ; tmp3
+       movq    MMWORD [wk(0)], mm0     ; tmp2
+
+       ; -- Odd part
+
+       movd      mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movd      mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movd      mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movd      mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd mm2,mm2
+       punpcklwd mm3,mm3
+       psrad     mm2,(DWORD_BIT-WORD_BIT)
+       psrad     mm3,(DWORD_BIT-WORD_BIT)
+       pi2fd     mm2,mm2
+       pi2fd     mm3,mm3
+
+       pfmul     mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       pfmul     mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       punpcklwd mm5,mm5
+       punpcklwd mm1,mm1
+       psrad     mm5,(DWORD_BIT-WORD_BIT)
+       psrad     mm1,(DWORD_BIT-WORD_BIT)
+       pi2fd     mm5,mm5
+       pi2fd     mm1,mm1
+
+       pfmul     mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       pfmul     mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movq    mm4,mm2
+       movq    mm0,mm5
+       pfadd   mm2,mm1                 ; mm2=z11
+       pfadd   mm5,mm3                 ; mm5=z13
+       pfsub   mm4,mm1                 ; mm4=z12
+       pfsub   mm0,mm3                 ; mm0=z10
+
+       movq    mm1,mm2
+       pfsub   mm2,mm5
+       pfadd   mm1,mm5                 ; mm1=tmp7
+
+       pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
+
+       movq    mm3,mm0
+       pfadd   mm0,mm4
+       pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+       pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+       pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+       pfsubr  mm3,mm0                 ; mm3=tmp12
+       pfsub   mm4,mm0                 ; mm4=tmp10
+
+       ; -- Final output stage
+
+       pfsub   mm3,mm1                 ; mm3=tmp6
+       movq    mm5,mm6
+       movq    mm0,mm7
+       pfadd   mm6,mm1                 ; mm6=data0=(00 01)
+       pfadd   mm7,mm3                 ; mm7=data1=(10 11)
+       pfsub   mm5,mm1                 ; mm5=data7=(70 71)
+       pfsub   mm0,mm3                 ; mm0=data6=(60 61)
+       pfsub   mm2,mm3                 ; mm2=tmp5
+
+       movq      mm1,mm6               ; transpose coefficients
+       punpckldq mm6,mm7               ; mm6=(00 10)
+       punpckhdq mm1,mm7               ; mm1=(01 11)
+       movq      mm3,mm0               ; transpose coefficients
+       punpckldq mm0,mm5               ; mm0=(60 70)
+       punpckhdq mm3,mm5               ; mm3=(61 71)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+       movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+       movq    mm5, MMWORD [wk(1)]     ; mm5=tmp3
+
+       pfadd   mm4,mm2                 ; mm4=tmp4
+       movq    mm6,mm7
+       movq    mm1,mm5
+       pfadd   mm7,mm2                 ; mm7=data2=(20 21)
+       pfadd   mm5,mm4                 ; mm5=data4=(40 41)
+       pfsub   mm6,mm2                 ; mm6=data5=(50 51)
+       pfsub   mm1,mm4                 ; mm1=data3=(30 31)
+
+       movq      mm0,mm7               ; transpose coefficients
+       punpckldq mm7,mm1               ; mm7=(20 30)
+       punpckhdq mm0,mm1               ; mm0=(21 31)
+       movq      mm3,mm5               ; transpose coefficients
+       punpckldq mm5,mm6               ; mm5=(40 50)
+       punpckhdq mm3,mm6               ; mm3=(41 51)
+
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0
+       movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+       movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3
+
+.nextcolumn:
+       add     esi, byte 2*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 2*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+       add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; -- Prefetch the next coefficient block
+
+       prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+       prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+       prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+       prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+       mov     ecx, DCTSIZE/2                          ; ctr
+       alignx  16,7
+.rowloop:
+
+       ; -- Even part
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movq    mm4,mm0
+       movq    mm5,mm1
+       pfsub   mm0,mm2                 ; mm0=tmp11
+       pfsub   mm1,mm3
+       pfadd   mm4,mm2                 ; mm4=tmp10
+       pfadd   mm5,mm3                 ; mm5=tmp13
+
+       pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+       pfsub   mm1,mm5                 ; mm1=tmp12
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       pfsub   mm4,mm5                 ; mm4=tmp3
+       pfsub   mm0,mm1                 ; mm0=tmp2
+       pfadd   mm6,mm5                 ; mm6=tmp0
+       pfadd   mm7,mm1                 ; mm7=tmp1
+
+       movq    MMWORD [wk(1)], mm4     ; tmp3
+       movq    MMWORD [wk(0)], mm0     ; tmp2
+
+       ; -- Odd part
+
+       movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+       movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movq    mm4,mm2
+       movq    mm0,mm5
+       pfadd   mm2,mm1                 ; mm2=z11
+       pfadd   mm5,mm3                 ; mm5=z13
+       pfsub   mm4,mm1                 ; mm4=z12
+       pfsub   mm0,mm3                 ; mm0=z10
+
+       movq    mm1,mm2
+       pfsub   mm2,mm5
+       pfadd   mm1,mm5                 ; mm1=tmp7
+
+       pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
+
+       movq    mm3,mm0
+       pfadd   mm0,mm4
+       pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+       pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+       pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+       pfsubr  mm3,mm0                 ; mm3=tmp12
+       pfsub   mm4,mm0                 ; mm4=tmp10
+
+       ; -- Final output stage
+
+       pfsub   mm3,mm1                 ; mm3=tmp6
+       movq    mm5,mm6
+       movq    mm0,mm7
+       pfadd   mm6,mm1                 ; mm6=data0=(00 10)
+       pfadd   mm7,mm3                 ; mm7=data1=(01 11)
+       pfsub   mm5,mm1                 ; mm5=data7=(07 17)
+       pfsub   mm0,mm3                 ; mm0=data6=(06 16)
+       pfsub   mm2,mm3                 ; mm2=tmp5
+
+       movq    mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm1=[PD_RNDINT_MAGIC]
+       pcmpeqd mm3,mm3
+       psrld   mm3,WORD_BIT            ; mm3={0xFFFF 0x0000 0xFFFF 0x0000}
+
+       pfadd   mm6,mm1                 ; mm6=roundint(data0/8)=(00 ** 10 **)
+       pfadd   mm7,mm1                 ; mm7=roundint(data1/8)=(01 ** 11 **)
+       pfadd   mm0,mm1                 ; mm0=roundint(data6/8)=(06 ** 16 **)
+       pfadd   mm5,mm1                 ; mm5=roundint(data7/8)=(07 ** 17 **)
+
+       pand    mm6,mm3                 ; mm6=(00 -- 10 --)
+       pslld   mm7,WORD_BIT            ; mm7=(-- 01 -- 11)
+       pand    mm0,mm3                 ; mm0=(06 -- 16 --)
+       pslld   mm5,WORD_BIT            ; mm5=(-- 07 -- 17)
+       por     mm6,mm7                 ; mm6=(00 01 10 11)
+       por     mm0,mm5                 ; mm0=(06 07 16 17)
+
+       movq    mm1, MMWORD [wk(0)]     ; mm1=tmp2
+       movq    mm3, MMWORD [wk(1)]     ; mm3=tmp3
+
+       pfadd   mm4,mm2                 ; mm4=tmp4
+       movq    mm7,mm1
+       movq    mm5,mm3
+       pfadd   mm1,mm2                 ; mm1=data2=(02 12)
+       pfadd   mm3,mm4                 ; mm3=data4=(04 14)
+       pfsub   mm7,mm2                 ; mm7=data5=(05 15)
+       pfsub   mm5,mm4                 ; mm5=data3=(03 13)
+
+       movq    mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm2=[PD_RNDINT_MAGIC]
+       pcmpeqd mm4,mm4
+       psrld   mm4,WORD_BIT            ; mm4={0xFFFF 0x0000 0xFFFF 0x0000}
+
+       pfadd   mm3,mm2                 ; mm3=roundint(data4/8)=(04 ** 14 **)
+       pfadd   mm7,mm2                 ; mm7=roundint(data5/8)=(05 ** 15 **)
+       pfadd   mm1,mm2                 ; mm1=roundint(data2/8)=(02 ** 12 **)
+       pfadd   mm5,mm2                 ; mm5=roundint(data3/8)=(03 ** 13 **)
+
+       pand    mm3,mm4                 ; mm3=(04 -- 14 --)
+       pslld   mm7,WORD_BIT            ; mm7=(-- 05 -- 15)
+       pand    mm1,mm4                 ; mm1=(02 -- 12 --)
+       pslld   mm5,WORD_BIT            ; mm5=(-- 03 -- 13)
+       por     mm3,mm7                 ; mm3=(04 05 14 15)
+       por     mm1,mm5                 ; mm1=(02 03 12 13)
+
+       movq      mm2,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm2=[PB_CENTERJSAMP]
+
+       packsswb  mm6,mm3               ; mm6=(00 01 10 11 04 05 14 15)
+       packsswb  mm1,mm0               ; mm1=(02 03 12 13 06 07 16 17)
+       paddb     mm6,mm2
+       paddb     mm1,mm2
+
+       movq      mm4,mm6               ; transpose coefficients(phase 2)
+       punpcklwd mm6,mm1               ; mm6=(00 01 02 03 10 11 12 13)
+       punpckhwd mm4,mm1               ; mm4=(04 05 06 07 14 15 16 17)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 3)
+       punpckldq mm6,mm4               ; mm6=(00 01 02 03 04 05 06 07)
+       punpckhdq mm7,mm4               ; mm7=(10 11 12 13 14 15 16 17)
+
+       pushpic ebx                     ; save GOT address
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+
+       poppic  ebx                     ; restore GOT address
+
+       add     esi, byte 2*SIZEOF_FAST_FLOAT   ; wsptr
+       add     edi, byte 2*SIZEOF_JSAMPROW
+       dec     ecx                             ; ctr
+       jnz     near .rowloop
+
+       femms           ; empty MMX/3DNow! state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_FLT_3DNOW_MMX_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jidctflt.asm b/jidctflt.asm
new file mode 100644 (file)
index 0000000..126dc7b
--- /dev/null
@@ -0,0 +1,473 @@
+;
+; jidctflt.asm - floating-point IDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+%define ROTATOR_TYPE   FP32    ; float
+
+       alignz  16
+       global  EXTN(jconst_idct_float)
+
+EXTN(jconst_idct_float):
+
+F_1_414        dd      1.414213562373095048801689      ; 2*cos(PI*1/4)
+F_1_847        dd      1.847759065022573512256366      ; 2*cos(PI*1/8)
+F_1_082        dd      1.082392200292393968799446      ; 2*(cos(PI*1/8)-cos(PI*3/8))
+F_2_613        dd      2.613125929752753055713286      ; 2*(cos(PI*1/8)+cos(PI*3/8))
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                  JCOEFPTR coef_block,
+;                  JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define tmp            ebp-SIZEOF_FP64 ; double tmp
+%define workspace      tmp-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                       ; FAST_FLOAT workspace[DCTSIZE2]
+%define rndint_magic   workspace-SIZEOF_FP32
+                                       ; float rndint_magic = 100663296.0F
+%define gotptr         rndint_magic-SIZEOF_POINTER     ; void * gotptr
+
+       align   16
+       global  EXTN(jpeg_idct_float)
+
+EXTN(jpeg_idct_float):
+       push    ebp
+       mov     ebp,esp
+       lea     esp, [workspace]
+       push    FP32 0x4CC00000         ; (float)(0x00C00000 << 3)
+       pushpic eax                     ; make a room for GOT address
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx                     ; get GOT address
+       movpic  POINTER [gotptr], ebx   ; save GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+       lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.columnloop:
+       mov     ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       mov     bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       mov     ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       or      ax,bx
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       fild    JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       fmul    FLOAT_MULT_TYPE [COL(0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       fst     FAST_FLOAT [COL(0,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(1,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(2,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(3,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(4,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(5,edi,SIZEOF_FAST_FLOAT)]
+       fst     FAST_FLOAT [COL(6,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(7,edi,SIZEOF_FAST_FLOAT)]
+       jmp     near .nextcolumn
+       alignx  16,7
+
+.columnDCT:
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       ; -- Even part
+
+       fild    JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+
+       fxch    st0,st3
+
+       fmul    FLOAT_MULT_TYPE [COL(2,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st2
+       fmul    FLOAT_MULT_TYPE [COL(6,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st1
+       fmul    FLOAT_MULT_TYPE [COL(4,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st3
+       fmul    FLOAT_MULT_TYPE [COL(0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st1
+
+       fld     st2     ; st2 = st2 + st0, st0 = st2 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st3,st0
+
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_414)]
+
+       fld     st3     ; st1 = st1 + st3, st3 = st1 - st3
+       fsubr   st0,st2
+       fxch    st0,st4
+       faddp   st2,st0
+
+       fsub    st0,st2
+
+       fld     st1     ; st2 = st1 + st2, st1 = st1 - st2
+       fsub    st0,st3
+       fxch    st0,st2
+       faddp   st3,st0
+       fld     st3     ; st0 = st3 + st0, st3 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st4
+       faddp   st1,st0
+
+       ; -- Odd part
+
+       fild    JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       fild    JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+
+       fxch    st0,st3
+
+       fmul    FLOAT_MULT_TYPE [COL(1,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st2
+       fmul    FLOAT_MULT_TYPE [COL(7,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st1
+       fmul    FLOAT_MULT_TYPE [COL(3,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st6
+       fxch    st3,st0
+       fmul    FLOAT_MULT_TYPE [COL(5,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       fxch    st0,st5
+       fstp    FP64 [tmp]
+
+       fld     st1     ; st1 = st1 + st0, st0 = st1 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st2,st0
+       fld     st5     ; st4 = st4 + st5, st5 = st4 - st5
+       fsubr   st0,st5
+       fxch    st0,st6
+       faddp   st5,st0
+
+       fld     st1     ; st1 = st1 + st4, st4 = st1 - st4
+       fsub    st0,st5
+       fxch    st0,st5
+       faddp   st2,st0
+
+       fld     st5
+       fadd    st0,st1
+       fxch    st0,st5
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_414)]
+       fxch    st0,st5
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_847)]
+       fxch    st0,st6
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_2_613)]
+       fxch    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_082)]
+       fxch    st0,st6
+       fsubr   st1,st0
+       fsubp   st6,st0
+
+       ; -- Final output stage
+
+       fsub    st0,st1
+       fld     st2     ; st1 = st2 + st1, st2 = st2 - st1
+       fsub    st0,st2
+       fxch    st0,st3
+       faddp   st2,st0
+       fsub    st4,st0
+       fld     st3     ; st0 = st3 + st0, st3 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st4
+       faddp   st1,st0
+
+       fxch    st0,st2
+
+       fstp    FAST_FLOAT [COL(7,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(0,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(1,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(6,edi,SIZEOF_FAST_FLOAT)]
+
+       fadd    st1,st0
+       fld     FP64 [tmp]
+       fld     st1     ; st3 = st3 + st1, st1 = st3 - st1
+       fsubr   st0,st4
+       fxch    st0,st2
+       faddp   st4,st0
+       fld     st0     ; st0 = st0 + st2, st2 = st0 - st2
+       fsub    st0,st3
+       fxch    st0,st3
+       faddp   st1,st0
+
+       fxch    st0,st3
+
+       fstp    FAST_FLOAT [COL(2,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(5,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(3,edi,SIZEOF_FAST_FLOAT)]
+       fstp    FAST_FLOAT [COL(4,edi,SIZEOF_FAST_FLOAT)]
+
+.nextcolumn:
+       add     esi, byte SIZEOF_JCOEF  ; advance pointers to next column
+       add     edx, byte SIZEOF_FLOAT_MULT_TYPE
+       add     edi, byte SIZEOF_FAST_FLOAT
+       dec     ecx
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     edx, POINTER [cinfo(ebp)]
+       mov     edx, POINTER [jdstruct_sample_range_limit(edx)]
+       sub     edx, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit
+
+       lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.rowloop:
+       push    edi
+       mov     edi, JSAMPROW [edi]                     ; (JSAMPLE *)
+       add     edi, JDIMENSION [output_col(ebp)]       ; edi=outptr
+
+%ifndef NO_ZERO_ROW_TEST_FLOAT
+       mov     eax, FAST_FLOAT [ROW(1,esi,SIZEOF_FAST_FLOAT)]
+       add     eax,eax                 ; shl eax,1 (shift out the sign bit)
+       jnz     short .rowDCT
+
+       mov     eax, FAST_FLOAT [ROW(2,esi,SIZEOF_FAST_FLOAT)]
+       mov     ebx, FAST_FLOAT [ROW(3,esi,SIZEOF_FAST_FLOAT)]
+       or      eax, FAST_FLOAT [ROW(4,esi,SIZEOF_FAST_FLOAT)]
+       or      ebx, FAST_FLOAT [ROW(5,esi,SIZEOF_FAST_FLOAT)]
+       or      eax, FAST_FLOAT [ROW(6,esi,SIZEOF_FAST_FLOAT)]
+       or      ebx, FAST_FLOAT [ROW(7,esi,SIZEOF_FAST_FLOAT)]
+       or      eax,ebx
+       add     eax,eax                 ; shl eax,1 (shift out the sign bit)
+       jnz     short .rowDCT
+
+       ; -- AC terms all zero
+
+       push    eax
+
+       fld     FAST_FLOAT [ROW(0,esi,SIZEOF_FAST_FLOAT)]
+       fadd    FP32 [rndint_magic]
+       fstp    FP32 [esp]
+
+       pop     eax
+       and     eax,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+4*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+5*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+6*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+7*SIZEOF_JSAMPLE], al
+       jmp     near .nextrow
+       alignx  16,7
+%endif
+.rowDCT:
+       movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+       ; -- Even part
+
+       fld     FAST_FLOAT [ROW(4,esi,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(2,esi,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(0,esi,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(6,esi,SIZEOF_FAST_FLOAT)]
+
+       fld     st2     ; st2 = st2 + st0, st0 = st2 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st3,st0
+
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_414)]
+
+       fld     st3     ; st1 = st1 + st3, st3 = st1 - st3
+       fsubr   st0,st2
+       fxch    st0,st4
+       faddp   st2,st0
+
+       fsub    st0,st2
+
+       fld     st1     ; st2 = st1 + st2, st1 = st1 - st2
+       fsub    st0,st3
+       fxch    st0,st2
+       faddp   st3,st0
+       fld     st3     ; st0 = st3 + st0, st3 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st4
+       faddp   st1,st0
+
+       ; -- Odd part
+
+       fld     FAST_FLOAT [ROW(3,esi,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st3
+       fld     FAST_FLOAT [ROW(1,esi,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(7,esi,SIZEOF_FAST_FLOAT)]
+       fld     FAST_FLOAT [ROW(5,esi,SIZEOF_FAST_FLOAT)]
+       fxch    st0,st5
+       fstp    FP64 [tmp]
+
+       fld     st1     ; st1 = st1 + st0, st0 = st1 - st0
+       fsub    st0,st1
+       fxch    st0,st1
+       faddp   st2,st0
+       fld     st5     ; st4 = st4 + st5, st5 = st4 - st5
+       fsubr   st0,st5
+       fxch    st0,st6
+       faddp   st5,st0
+
+       fld     st1     ; st1 = st1 + st4, st4 = st1 - st4
+       fsub    st0,st5
+       fxch    st0,st5
+       faddp   st2,st0
+
+       fld     st5
+       fadd    st0,st1
+       fxch    st0,st5
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_414)]
+       fxch    st0,st5
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_847)]
+       fxch    st0,st6
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_2_613)]
+       fxch    st0,st1
+       fmul    ROTATOR_TYPE [GOTOFF(ebx,F_1_082)]
+       fxch    st0,st6
+       fsubr   st1,st0
+       fsubp   st6,st0
+
+       ; -- Final output stage
+
+       sub     esp, byte DCTSIZE*SIZEOF_FP32
+
+       fsub    st0,st1
+       fld     st2     ; st1 = st2 + st1, st2 = st2 - st1
+       fsub    st0,st2
+       fxch    st0,st3
+       faddp   st2,st0
+       fsub    st4,st0
+       fld     st3     ; st0 = st3 + st0, st3 = st3 - st0
+       fsub    st0,st1
+       fxch    st0,st4
+       faddp   st1,st0
+
+       fld     FP32 [rndint_magic]
+
+       fadd    st4,st0
+       fadd    st1,st0
+       fadd    st2,st0
+       fadd    st3,st0
+
+       fxch    st0,st4
+
+       fstp    FP32 [esp+6*SIZEOF_FP32]
+       fstp    FP32 [esp+1*SIZEOF_FP32]
+       fstp    FP32 [esp+0*SIZEOF_FP32]
+       fstp    FP32 [esp+7*SIZEOF_FP32]
+
+       fxch    st0,st1
+
+       fadd    st2,st0
+       fld     FP64 [tmp]
+       fld     st1     ; st4 = st4 + st1, st1 = st4 - st1
+       fsubr   st0,st5
+       fxch    st0,st2
+       faddp   st5,st0
+       fld     st0     ; st0 = st0 + st3, st3 = st0 - st3
+       fsub    st0,st4
+       fxch    st0,st4
+       faddp   st1,st0
+
+       fxch    st0,st2
+
+       fadd    st1,st0
+       fadd    st2,st0
+       fadd    st3,st0
+       faddp   st4,st0
+
+       fstp    FP32 [esp+5*SIZEOF_FP32]
+       fstp    FP32 [esp+4*SIZEOF_FP32]
+       fstp    FP32 [esp+3*SIZEOF_FP32]
+       fstp    FP32 [esp+2*SIZEOF_FP32]
+
+%assign i 0    ; i=0;
+%rep 4 ; -- repeat 4 times ---
+       pop     eax
+       pop     ebx
+       and     eax,RANGE_MASK
+       and     ebx,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     bl, JSAMPLE [edx+ebx*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+(i+0)*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+(i+1)*SIZEOF_JSAMPLE], bl
+%assign i i+2  ; i+=2;
+%endrep        ; -- repeat end ---
+
+.nextrow:
+       pop     edi
+       add     esi, byte DCTSIZE*SIZEOF_FAST_FLOAT
+       add     edi, byte SIZEOF_JSAMPROW       ; advance pointer to next row
+       dec     ecx
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jidctfst.asm b/jidctfst.asm
new file mode 100644 (file)
index 0000000..8022ac6
--- /dev/null
@@ -0,0 +1,464 @@
+;
+; jidctfst.asm - fast integer IDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+; We can gain a little more speed, with a further compromise in accuracy,
+; by omitting the addition in a descaling shift.  This yields an
+; incorrectly rounded result half the time...
+;
+%macro descale 2
+%ifdef USE_ACCURATE_ROUNDING
+%if (%2)<=7
+       add     %1, byte (1<<((%2)-1))  ; add reg32,imm8
+%else
+       add     %1, (1<<((%2)-1))       ; add reg32,imm32
+%endif
+%endif
+       sar     %1,%2
+%endmacro
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8
+%define PASS1_BITS     2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082        equ     277             ; FIX(1.082392200)
+F_1_414        equ     362             ; FIX(1.414213562)
+F_1_847        equ     473             ; FIX(1.847759065)
+F_2_613        equ     669             ; FIX(2.613125930)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082        equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414        equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613        equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                  JCOEFPTR coef_block,
+;                  JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define range_limit    ebp-SIZEOF_POINTER              ; JSAMPLE * range_limit
+%define ptr            range_limit-SIZEOF_POINTER      ; void * ptr
+%define workspace      ptr-DCTSIZE2*SIZEOF_INT
+                                       ; int workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_ifast)
+
+EXTN(jpeg_idct_ifast):
+       push    ebp
+       mov     ebp,esp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+       lea     edi, [workspace]                        ; int * wsptr
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.columnloop:
+       mov     ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       mov     bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       mov     ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       or      ax,bx
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       mov     ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    ax, IFAST_MULT_TYPE [COL(0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       cwde
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(1,edi,SIZEOF_INT)], eax
+       mov     INT [COL(2,edi,SIZEOF_INT)], eax
+       mov     INT [COL(3,edi,SIZEOF_INT)], eax
+       mov     INT [COL(4,edi,SIZEOF_INT)], eax
+       mov     INT [COL(5,edi,SIZEOF_INT)], eax
+       mov     INT [COL(6,edi,SIZEOF_INT)], eax
+       mov     INT [COL(7,edi,SIZEOF_INT)], eax
+       jmp     near .nextcolumn
+       alignx  16,7
+
+.columnDCT:
+       push    ecx     ; ctr
+       push    esi     ; coef_block
+       push    edx     ; quantptr
+
+       mov     POINTER [ptr], edi      ; wsptr
+
+       ; -- Even part
+
+       movsx   eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       movsx   ecx, JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       imul    ax, IFAST_MULT_TYPE [COL(0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       imul    cx, IFAST_MULT_TYPE [COL(4,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movsx   ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       movsx   edi, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       imul    bx, IFAST_MULT_TYPE [COL(2,edx,SIZEOF_IFAST_MULT_TYPE)]
+       imul    di, IFAST_MULT_TYPE [COL(6,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       lea     edx,[eax+ecx]           ; edx=tmp10
+       sub     eax,ecx                 ; eax=tmp11
+
+       lea     ecx,[ebx+edi]           ; ecx=tmp13
+       sub     ebx,edi
+       imul    ebx,(F_1_414)
+       descale ebx,CONST_BITS
+       sub     ebx,ecx                 ; ebx=tmp12
+
+       lea     edi,[edx+ecx]           ; edi=tmp0
+       sub     edx,ecx                 ; edx=tmp3
+       lea     ecx,[eax+ebx]           ; ecx=tmp1
+       sub     eax,ebx                 ; eax=tmp2
+
+       push    edx             ; tmp3
+       push    eax             ; tmp2
+       push    ecx             ; tmp1
+       push    edi             ; tmp0
+
+       ; -- Odd part
+
+       mov     edx, POINTER [esp+16]   ; quantptr
+
+       movsx   eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       movsx   ebx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       imul    ax, IFAST_MULT_TYPE [COL(1,edx,SIZEOF_IFAST_MULT_TYPE)]
+       imul    bx, IFAST_MULT_TYPE [COL(7,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movsx   edi, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       movsx   ecx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       imul    di, IFAST_MULT_TYPE [COL(5,edx,SIZEOF_IFAST_MULT_TYPE)]
+       imul    cx, IFAST_MULT_TYPE [COL(3,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       lea     esi,[eax+ebx]           ; esi=z11
+       sub     eax,ebx                 ; eax=z12
+       lea     edx,[edi+ecx]           ; edx=z13
+       sub     edi,ecx                 ; edi=z10
+
+       lea     ebx,[esi+edx]           ; ebx=tmp7
+       sub     esi,edx
+       imul    esi,(F_1_414)           ; esi=tmp11
+       descale esi,CONST_BITS
+
+       lea     ecx,[edi+eax]
+       imul    ecx,(F_1_847)           ; ecx=z5
+       imul    edi,(-F_2_613)          ; edi=MULTIPLY(z10,-FIX_2_613125930)
+       imul    eax,(F_1_082)           ; eax=MULTIPLY(z12,FIX_1_082392200)
+       descale ecx,CONST_BITS
+       descale edi,CONST_BITS
+       descale eax,CONST_BITS
+       add     edi,ecx                 ; edi=tmp12
+       sub     eax,ecx                 ; eax=tmp10
+
+       ; -- Final output stage
+
+       sub     edi,ebx         ; edi=tmp6
+       pop     edx             ; edx=tmp0
+       sub     esi,edi         ; esi=tmp5
+       pop     ecx             ; ecx=tmp1
+       add     eax,esi         ; eax=tmp4
+       push    esi             ; tmp5
+       push    eax             ; tmp4
+
+       lea     eax,[edx+ebx]   ; eax=data0(=tmp0+tmp7)
+       sub     edx,ebx         ; edx=data7(=tmp0-tmp7)
+       lea     ebx,[ecx+edi]   ; ebx=data1(=tmp1+tmp6)
+       sub     ecx,edi         ; ecx=data6(=tmp1-tmp6)
+
+       mov     edi, POINTER [ptr]      ; edi=wsptr
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(7,edi,SIZEOF_INT)], edx
+       mov     INT [COL(1,edi,SIZEOF_INT)], ebx
+       mov     INT [COL(6,edi,SIZEOF_INT)], ecx
+
+       pop     esi             ; esi=tmp4
+       pop     eax             ; eax=tmp5
+       pop     edx             ; edx=tmp2
+       pop     ecx             ; ecx=tmp3
+
+       lea     ebx,[edx+eax]   ; ebx=data2(=tmp2+tmp5)
+       sub     edx,eax         ; edx=data5(=tmp2-tmp5)
+       lea     eax,[ecx+esi]   ; eax=data4(=tmp3+tmp4)
+       sub     ecx,esi         ; ecx=data3(=tmp3-tmp4)
+
+       mov     INT [COL(2,edi,SIZEOF_INT)], ebx
+       mov     INT [COL(5,edi,SIZEOF_INT)], edx
+       mov     INT [COL(4,edi,SIZEOF_INT)], eax
+       mov     INT [COL(3,edi,SIZEOF_INT)], ecx
+
+       pop     edx     ; quantptr
+       pop     esi     ; coef_block
+       pop     ecx     ; ctr
+
+.nextcolumn:
+       add     esi, byte SIZEOF_JCOEF  ; advance pointers to next column
+       add     edx, byte SIZEOF_IFAST_MULT_TYPE
+       add     edi, byte SIZEOF_INT
+       dec     ecx
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, POINTER [jdstruct_sample_range_limit(eax)]
+       sub     eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit
+       mov     POINTER [range_limit], eax
+
+       lea     esi, [workspace]                        ; int * wsptr
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.rowloop:
+       push    edi
+       mov     edi, JSAMPROW [edi]                     ; (JSAMPLE *)
+       add     edi, JDIMENSION [output_col(ebp)]       ; edi=outptr
+
+%ifndef NO_ZERO_ROW_TEST
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(2,esi,SIZEOF_INT)]
+       jnz     short .rowDCT
+
+       mov     ebx, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     eax, INT [ROW(4,esi,SIZEOF_INT)]
+       or      ebx, INT [ROW(5,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(6,esi,SIZEOF_INT)]
+       or      ebx, INT [ROW(7,esi,SIZEOF_INT)]
+       or      eax,ebx
+       jnz     short .rowDCT
+
+       ; -- AC terms all zero
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+
+       mov     edx, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(PASS1_BITS+3)
+       and     eax,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+4*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+5*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+6*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+7*SIZEOF_JSAMPLE], al
+       jmp     near .nextrow
+       alignx  16,7
+%endif
+.rowDCT:
+       push    esi     ; wsptr
+       push    ecx     ; ctr
+
+       mov     POINTER [ptr], edi      ; outptr
+
+       ; -- Even part
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(2,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(4,esi,SIZEOF_INT)]
+       mov     edi, INT [ROW(6,esi,SIZEOF_INT)]
+
+       lea     edx,[eax+ecx]           ; edx=tmp10
+       sub     eax,ecx                 ; eax=tmp11
+
+       lea     ecx,[ebx+edi]           ; ecx=tmp13
+       sub     ebx,edi
+       imul    ebx,(F_1_414)
+       descale ebx,CONST_BITS
+       sub     ebx,ecx                 ; ebx=tmp12
+
+       lea     edi,[edx+ecx]           ; edi=tmp0
+       sub     edx,ecx                 ; edx=tmp3
+       lea     ecx,[eax+ebx]           ; ecx=tmp1
+       sub     eax,ebx                 ; eax=tmp2
+
+       push    edx             ; tmp3
+       push    eax             ; tmp2
+       push    ecx             ; tmp1
+       push    edi             ; tmp0
+
+       ; -- Odd part
+
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     edi, INT [ROW(5,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(7,esi,SIZEOF_INT)]
+
+       lea     esi,[eax+ebx]           ; esi=z11
+       sub     eax,ebx                 ; eax=z12
+       lea     edx,[edi+ecx]           ; edx=z13
+       sub     edi,ecx                 ; edi=z10
+
+       lea     ebx,[esi+edx]           ; ebx=tmp7
+       sub     esi,edx
+       imul    esi,(F_1_414)           ; esi=tmp11
+       descale esi,CONST_BITS
+
+       lea     ecx,[edi+eax]
+       imul    ecx,(F_1_847)           ; ecx=z5
+       imul    edi,(-F_2_613)          ; edi=MULTIPLY(z10,-FIX_2_613125930)
+       imul    eax,(F_1_082)           ; eax=MULTIPLY(z12,FIX_1_082392200)
+       descale ecx,CONST_BITS
+       descale edi,CONST_BITS
+       descale eax,CONST_BITS
+       add     edi,ecx                 ; edi=tmp12
+       sub     eax,ecx                 ; eax=tmp10
+
+       ; -- Final output stage
+
+       sub     edi,ebx         ; edi=tmp6
+       pop     edx             ; edx=tmp0
+       sub     esi,edi         ; esi=tmp5
+       pop     ecx             ; ecx=tmp1
+       add     eax,esi         ; eax=tmp4
+       push    esi             ; tmp5
+       push    eax             ; tmp4
+
+       lea     eax,[edx+ebx]   ; eax=data0(=tmp0+tmp7)
+       sub     edx,ebx         ; edx=data7(=tmp0-tmp7)
+       lea     ebx,[ecx+edi]   ; ebx=data1(=tmp1+tmp6)
+       sub     ecx,edi         ; ecx=data6(=tmp1-tmp6)
+
+       mov     esi, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(PASS1_BITS+3)
+       descale edx,(PASS1_BITS+3)
+       descale ebx,(PASS1_BITS+3)
+       descale ecx,(PASS1_BITS+3)
+
+       mov     edi, POINTER [ptr]              ; edi=outptr
+
+       and     eax,RANGE_MASK
+       and     edx,RANGE_MASK
+       and     ebx,RANGE_MASK
+       and     ecx,RANGE_MASK
+
+       mov     al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE]
+       mov     bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE]
+       mov     cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE]
+
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+7*SIZEOF_JSAMPLE], dl
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], bl
+       mov     JSAMPLE [edi+6*SIZEOF_JSAMPLE], cl
+
+       pop     esi             ; esi=tmp4
+       pop     eax             ; eax=tmp5
+       pop     edx             ; edx=tmp2
+       pop     ecx             ; ecx=tmp3
+
+       lea     ebx,[edx+eax]   ; ebx=data2(=tmp2+tmp5)
+       sub     edx,eax         ; edx=data5(=tmp2-tmp5)
+       lea     eax,[ecx+esi]   ; eax=data4(=tmp3+tmp4)
+       sub     ecx,esi         ; ecx=data3(=tmp3-tmp4)
+
+       mov     esi, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale ebx,(PASS1_BITS+3)
+       descale edx,(PASS1_BITS+3)
+       descale eax,(PASS1_BITS+3)
+       descale ecx,(PASS1_BITS+3)
+
+       and     ebx,RANGE_MASK
+       and     edx,RANGE_MASK
+       and     eax,RANGE_MASK
+       and     ecx,RANGE_MASK
+
+       mov     bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE]
+       mov     al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE]
+       mov     cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE]
+
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], bl
+       mov     JSAMPLE [edi+5*SIZEOF_JSAMPLE], dl
+       mov     JSAMPLE [edi+4*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], cl
+
+       pop     ecx     ; ctr
+       pop     esi     ; wsptr
+
+.nextrow:
+       pop     edi
+       add     esi, byte DCTSIZE*SIZEOF_INT    ; advance pointer to next row
+       add     edi, byte SIZEOF_JSAMPROW
+       dec     ecx
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jidctint.asm b/jidctint.asm
new file mode 100644 (file)
index 0000000..eb81919
--- /dev/null
@@ -0,0 +1,524 @@
+;
+; jidctint.asm - accurate integer IDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+; Descale and correctly round a DWORD value that's scaled by N bits.
+;
+%macro descale 2
+%if (%2)<=7
+       add     %1, byte (1<<((%2)-1))  ; add reg32,imm8
+%else
+       add     %1, (1<<((%2)-1))       ; add reg32,imm32
+%endif
+       sar     %1,%2
+%endmacro
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                  JCOEFPTR coef_block,
+;                  JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define range_limit    ebp-SIZEOF_POINTER              ; JSAMPLE * range_limit
+%define ptr            range_limit-SIZEOF_POINTER      ; void * ptr
+%define workspace      ptr-DCTSIZE2*SIZEOF_INT
+                                       ; int workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_islow)
+
+EXTN(jpeg_idct_islow):
+       push    ebp
+       mov     ebp,esp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+       lea     edi, [workspace]                        ; int * wsptr
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.columnloop:
+       mov     ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       mov     bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       mov     ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       or      ax,bx
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       mov     ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       cwde
+
+       sal     eax,PASS1_BITS
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(1,edi,SIZEOF_INT)], eax
+       mov     INT [COL(2,edi,SIZEOF_INT)], eax
+       mov     INT [COL(3,edi,SIZEOF_INT)], eax
+       mov     INT [COL(4,edi,SIZEOF_INT)], eax
+       mov     INT [COL(5,edi,SIZEOF_INT)], eax
+       mov     INT [COL(6,edi,SIZEOF_INT)], eax
+       mov     INT [COL(7,edi,SIZEOF_INT)], eax
+       jmp     near .nextcolumn
+       alignx  16,7
+
+.columnDCT:
+       push    ecx     ; ctr
+       push    esi     ; coef_block
+       push    edx     ; quantptr
+
+       mov     POINTER [ptr], edi      ; wsptr
+
+       ; -- Even part
+
+       movsx   eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       movsx   ecx, JCOEF [COL(4,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    cx, ISLOW_MULT_TYPE [COL(4,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movsx   ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       movsx   edi, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       imul    bx, ISLOW_MULT_TYPE [COL(2,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    di, ISLOW_MULT_TYPE [COL(6,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       lea     edx,[eax+ecx]
+       sub     eax,ecx
+       sal     edx,CONST_BITS  ; edx=tmp0
+       sal     eax,CONST_BITS  ; eax=tmp1
+
+       lea     ecx,[ebx+edi]
+       imul    ecx,(F_0_541)   ; ecx=z1
+       imul    ebx,(F_0_765)   ; ebx=MULTIPLY(z2,FIX_0_765366865)
+       imul    edi,(-F_1_847)  ; edi=MULTIPLY(z3,-FIX_1_847759065)
+       add     ebx,ecx         ; ebx=tmp3
+       add     edi,ecx         ; edi=tmp2
+
+       lea     ecx,[edx+ebx]   ; ecx=tmp10
+       sub     edx,ebx         ; edx=tmp13
+       lea     ebx,[eax+edi]   ; ebx=tmp11
+       sub     eax,edi         ; eax=tmp12
+
+       push    edx             ; tmp13
+       push    eax             ; tmp12
+       push    ebx             ; tmp11
+       push    ecx             ; tmp10
+
+       ; -- Odd part
+
+       mov     edx, POINTER [esp+16]   ; quantptr
+
+       movsx   eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       movsx   edi, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    di, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movsx   ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       movsx   ebx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       imul    cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    bx, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       push    eax             ; eax=tmp3
+       push    edi             ; edi=tmp2
+       push    ecx             ; ecx=tmp1
+       push    ebx             ; ebx=tmp0
+
+       lea     esi,[ebx+edi]   ; esi=z3
+       lea     edx,[ecx+eax]   ; edx=z4
+       add     ebx,eax         ; ebx=z1
+       add     ecx,edi         ; ecx=z2
+
+       lea     eax,[esi+edx]
+       imul    eax,(F_1_175)   ; eax=z5
+
+       imul    esi,(-F_1_961)  ; esi=z3(=MULTIPLY(z3,-FIX_1_961570560))
+       imul    edx,(-F_0_390)  ; edx=z4(=MULTIPLY(z4,-FIX_0_390180644))
+       imul    ebx,(-F_0_899)  ; ebx=z1(=MULTIPLY(z1,-FIX_0_899976223))
+       imul    ecx,(-F_2_562)  ; ecx=z2(=MULTIPLY(z2,-FIX_2_562915447))
+
+       add     esi,eax         ; esi=z3(=z3+z5)
+       add     edx,eax         ; edx=z4(=z4+z5)
+
+       lea     edi,[esi+ebx]   ; edi=z1+z3
+       lea     eax,[edx+ecx]   ; eax=z2+z4
+       add     esi,ecx         ; esi=z2+z3
+       add     edx,ebx         ; edx=z1+z4
+
+       pop     ecx             ; ecx=tmp0
+       pop     ebx             ; ebx=tmp1
+       imul    ecx,(F_0_298)   ; ecx=tmp0(=MULTIPLY(tmp0,FIX_0_298631336))
+       imul    ebx,(F_2_053)   ; ebx=tmp1(=MULTIPLY(tmp1,FIX_2_053119869))
+       add     edi,ecx         ; edi=tmp0(=tmp0+z1+z3)
+       add     eax,ebx         ; eax=tmp1(=tmp1+z2+z4)
+
+       pop     ecx             ; ecx=tmp2
+       pop     ebx             ; ebx=tmp3
+       imul    ecx,(F_3_072)   ; ecx=tmp2(=MULTIPLY(tmp2,FIX_3_072711026))
+       imul    ebx,(F_1_501)   ; ebx=tmp3(=MULTIPLY(tmp3,FIX_1_501321110))
+       add     esi,ecx         ; esi=tmp2(=tmp2+z2+z3)
+       add     edx,ebx         ; edx=tmp3(=tmp3+z1+z4)
+
+       ; -- Final output stage
+
+       pop     ecx             ; ecx=tmp10
+       pop     ebx             ; ebx=tmp11
+       push    eax             ; tmp1
+       push    edi             ; tmp0
+
+       lea     eax,[ecx+edx]   ; eax=data0(=tmp10+tmp3)
+       sub     ecx,edx         ; ecx=data7(=tmp10-tmp3)
+       lea     edx,[ebx+esi]   ; edx=data1(=tmp11+tmp2)
+       sub     ebx,esi         ; ebx=data6(=tmp11-tmp2)
+
+       mov     edi, POINTER [ptr]      ; edi=wsptr
+
+       descale eax,(CONST_BITS-PASS1_BITS)
+       descale ecx,(CONST_BITS-PASS1_BITS)
+       descale edx,(CONST_BITS-PASS1_BITS)
+       descale ebx,(CONST_BITS-PASS1_BITS)
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(7,edi,SIZEOF_INT)], ecx
+       mov     INT [COL(1,edi,SIZEOF_INT)], edx
+       mov     INT [COL(6,edi,SIZEOF_INT)], ebx
+
+       pop     esi             ; esi=tmp0
+       pop     eax             ; eax=tmp1
+       pop     ecx             ; ecx=tmp12
+       pop     edx             ; edx=tmp13
+
+       lea     ebx,[ecx+eax]   ; ebx=data2(=tmp12+tmp1)
+       sub     ecx,eax         ; ecx=data5(=tmp12-tmp1)
+       lea     eax,[edx+esi]   ; eax=data3(=tmp13+tmp0)
+       sub     edx,esi         ; edx=data4(=tmp13-tmp0)
+
+       descale ebx,(CONST_BITS-PASS1_BITS)
+       descale ecx,(CONST_BITS-PASS1_BITS)
+       descale eax,(CONST_BITS-PASS1_BITS)
+       descale edx,(CONST_BITS-PASS1_BITS)
+
+       mov     INT [COL(2,edi,SIZEOF_INT)], ebx
+       mov     INT [COL(5,edi,SIZEOF_INT)], ecx
+       mov     INT [COL(3,edi,SIZEOF_INT)], eax
+       mov     INT [COL(4,edi,SIZEOF_INT)], edx
+
+       pop     edx     ; quantptr
+       pop     esi     ; coef_block
+       pop     ecx     ; ctr
+
+.nextcolumn:
+       add     esi, byte SIZEOF_JCOEF  ; advance pointers to next column
+       add     edx, byte SIZEOF_ISLOW_MULT_TYPE
+       add     edi, byte SIZEOF_INT
+       dec     ecx
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, POINTER [jdstruct_sample_range_limit(eax)]
+       sub     eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit
+       mov     POINTER [range_limit], eax
+
+       lea     esi, [workspace]                        ; int * wsptr
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.rowloop:
+       push    edi
+       mov     edi, JSAMPROW [edi]                     ; (JSAMPLE *)
+       add     edi, JDIMENSION [output_col(ebp)]       ; edi=outptr
+
+%ifndef NO_ZERO_ROW_TEST
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(2,esi,SIZEOF_INT)]
+       jnz     short .rowDCT
+
+       mov     ebx, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     eax, INT [ROW(4,esi,SIZEOF_INT)]
+       or      ebx, INT [ROW(5,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(6,esi,SIZEOF_INT)]
+       or      ebx, INT [ROW(7,esi,SIZEOF_INT)]
+       or      eax,ebx
+       jnz     short .rowDCT
+
+       ; -- AC terms all zero
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+
+       mov     edx, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(PASS1_BITS+3)
+       and     eax,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+4*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+5*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+6*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+7*SIZEOF_JSAMPLE], al
+       jmp     near .nextrow
+       alignx  16,7
+%endif
+.rowDCT:
+       push    esi     ; wsptr
+       push    ecx     ; ctr
+
+       mov     POINTER [ptr], edi      ; outptr
+
+       ; -- Even part
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(2,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(4,esi,SIZEOF_INT)]
+       mov     edi, INT [ROW(6,esi,SIZEOF_INT)]
+
+       lea     edx,[eax+ecx]
+       sub     eax,ecx
+       sal     edx,CONST_BITS  ; edx=tmp0
+       sal     eax,CONST_BITS  ; eax=tmp1
+
+       lea     ecx,[ebx+edi]
+       imul    ecx,(F_0_541)   ; ecx=z1
+       imul    ebx,(F_0_765)   ; ebx=MULTIPLY(z2,FIX_0_765366865)
+       imul    edi,(-F_1_847)  ; edi=MULTIPLY(z3,-FIX_1_847759065)
+       add     ebx,ecx         ; ebx=tmp3
+       add     edi,ecx         ; edi=tmp2
+
+       lea     ecx,[edx+ebx]   ; ecx=tmp10
+       sub     edx,ebx         ; edx=tmp13
+       lea     ebx,[eax+edi]   ; ebx=tmp11
+       sub     eax,edi         ; eax=tmp12
+
+       push    edx             ; tmp13
+       push    eax             ; tmp12
+       push    ebx             ; tmp11
+       push    ecx             ; tmp10
+
+       ; -- Odd part
+
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       mov     edi, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(5,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(7,esi,SIZEOF_INT)]
+
+       push    eax             ; eax=tmp3
+       push    edi             ; edi=tmp2
+       push    ecx             ; ecx=tmp1
+       push    ebx             ; ebx=tmp0
+
+       lea     esi,[ebx+edi]   ; esi=z3
+       lea     edx,[ecx+eax]   ; edx=z4
+       add     ebx,eax         ; ebx=z1
+       add     ecx,edi         ; ecx=z2
+
+       lea     eax,[esi+edx]
+       imul    eax,(F_1_175)   ; eax=z5
+
+       imul    esi,(-F_1_961)  ; esi=z3(=MULTIPLY(z3,-FIX_1_961570560))
+       imul    edx,(-F_0_390)  ; edx=z4(=MULTIPLY(z4,-FIX_0_390180644))
+       imul    ebx,(-F_0_899)  ; ebx=z1(=MULTIPLY(z1,-FIX_0_899976223))
+       imul    ecx,(-F_2_562)  ; ecx=z2(=MULTIPLY(z2,-FIX_2_562915447))
+
+       add     esi,eax         ; esi=z3(=z3+z5)
+       add     edx,eax         ; edx=z4(=z4+z5)
+
+       lea     edi,[esi+ebx]   ; edi=z1+z3
+       lea     eax,[edx+ecx]   ; eax=z2+z4
+       add     esi,ecx         ; esi=z2+z3
+       add     edx,ebx         ; edx=z1+z4
+
+       pop     ecx             ; ecx=tmp0
+       pop     ebx             ; ebx=tmp1
+       imul    ecx,(F_0_298)   ; ecx=tmp0(=MULTIPLY(tmp0,FIX_0_298631336))
+       imul    ebx,(F_2_053)   ; ebx=tmp1(=MULTIPLY(tmp1,FIX_2_053119869))
+       add     edi,ecx         ; edi=tmp0(=tmp0+z1+z3)
+       add     eax,ebx         ; eax=tmp1(=tmp1+z2+z4)
+
+       pop     ecx             ; ecx=tmp2
+       pop     ebx             ; ebx=tmp3
+       imul    ecx,(F_3_072)   ; ecx=tmp2(=MULTIPLY(tmp2,FIX_3_072711026))
+       imul    ebx,(F_1_501)   ; ebx=tmp3(=MULTIPLY(tmp3,FIX_1_501321110))
+       add     esi,ecx         ; esi=tmp2(=tmp2+z2+z3)
+       add     edx,ebx         ; edx=tmp3(=tmp3+z1+z4)
+
+       ; -- Final output stage
+
+       pop     ecx             ; ecx=tmp10
+       pop     ebx             ; ebx=tmp11
+       push    eax             ; tmp1
+       push    edi             ; tmp0
+
+       lea     eax,[ecx+edx]   ; eax=data0(=tmp10+tmp3)
+       sub     ecx,edx         ; ecx=data7(=tmp10-tmp3)
+       lea     edx,[ebx+esi]   ; edx=data1(=tmp11+tmp2)
+       sub     ebx,esi         ; ebx=data6(=tmp11-tmp2)
+
+       mov     esi, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(CONST_BITS+PASS1_BITS+3)
+       descale ecx,(CONST_BITS+PASS1_BITS+3)
+       descale edx,(CONST_BITS+PASS1_BITS+3)
+       descale ebx,(CONST_BITS+PASS1_BITS+3)
+
+       mov     edi, POINTER [ptr]              ; edi=outptr
+
+       and     eax,RANGE_MASK
+       and     ecx,RANGE_MASK
+       and     edx,RANGE_MASK
+       and     ebx,RANGE_MASK
+
+       mov     al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE]
+       mov     cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE]
+       mov     bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE]
+
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+7*SIZEOF_JSAMPLE], cl
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], dl
+       mov     JSAMPLE [edi+6*SIZEOF_JSAMPLE], bl
+
+       pop     esi             ; esi=tmp0
+       pop     eax             ; eax=tmp1
+       pop     ecx             ; ecx=tmp12
+       pop     edx             ; edx=tmp13
+
+       lea     ebx,[ecx+eax]   ; ebx=data2(=tmp12+tmp1)
+       sub     ecx,eax         ; ecx=data5(=tmp12-tmp1)
+       lea     eax,[edx+esi]   ; eax=data3(=tmp13+tmp0)
+       sub     edx,esi         ; edx=data4(=tmp13-tmp0)
+
+       mov     esi, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale ebx,(CONST_BITS+PASS1_BITS+3)
+       descale ecx,(CONST_BITS+PASS1_BITS+3)
+       descale eax,(CONST_BITS+PASS1_BITS+3)
+       descale edx,(CONST_BITS+PASS1_BITS+3)
+
+       and     ebx,RANGE_MASK
+       and     ecx,RANGE_MASK
+       and     eax,RANGE_MASK
+       and     edx,RANGE_MASK
+
+       mov     bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE]
+       mov     cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE]
+       mov     al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE]
+
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], bl
+       mov     JSAMPLE [edi+5*SIZEOF_JSAMPLE], cl
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+4*SIZEOF_JSAMPLE], dl
+
+       pop     ecx     ; ctr
+       pop     esi     ; wsptr
+
+.nextrow:
+       pop     edi
+       add     esi, byte DCTSIZE*SIZEOF_INT    ; advance pointer to next row
+       add     edi, byte SIZEOF_JSAMPROW
+       dec     ecx
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jidctred.asm b/jidctred.asm
new file mode 100644 (file)
index 0000000..4463bfb
--- /dev/null
@@ -0,0 +1,688 @@
+;
+; jidctred.asm - reduced-size IDCT (non-SIMD)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size output:
+; either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; Last Modified : October 17, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef IDCT_SCALING_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+; Descale and correctly round a DWORD value that's scaled by N bits.
+;
+%macro descale 2
+%if (%2)<=7
+       add     %1, byte (1<<((%2)-1))  ; add reg32,imm8
+%else
+       add     %1, (1<<((%2)-1))       ; add reg32,imm32
+%endif
+       sar     %1,%2
+%endmacro
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%if CONST_BITS == 13
+F_0_211        equ      1730           ; FIX(0.211164243)
+F_0_509        equ      4176           ; FIX(0.509795579)
+F_0_601        equ      4926           ; FIX(0.601344887)
+F_0_720        equ      5906           ; FIX(0.720959822)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_850        equ      6967           ; FIX(0.850430095)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_061        equ      8697           ; FIX(1.061594337)
+F_1_272        equ     10426           ; FIX(1.272758580)
+F_1_451        equ     11893           ; FIX(1.451774981)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_2_172        equ     17799           ; FIX(2.172734803)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_624        equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211        equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509        equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601        equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720        equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850        equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061        equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272        equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451        equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172        equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624        equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                JCOEFPTR coef_block,
+;                JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define range_limit    ebp-SIZEOF_POINTER      ; JSAMPLE * range_limit
+%define workspace      range_limit-(DCTSIZE*4)*SIZEOF_INT
+                                       ; int workspace[DCTSIZE*4]
+
+       align   16
+       global  EXTN(jpeg_idct_4x4)
+
+EXTN(jpeg_idct_4x4):
+       push    ebp
+       mov     ebp,esp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+       lea     edi, [workspace]                        ; int * wsptr
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.columnloop:
+       ; Don't bother to process column 4, because second pass won't use it
+       cmp     ecx, byte DCTSIZE-4
+       je      near .nextcolumn
+
+       mov     ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       mov     ax, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       mov     bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       or      bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       or      ax,bx
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero; we need not examine term 4 for 4x4 output
+
+       mov     ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       cwde
+
+       sal     eax, PASS1_BITS
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(1,edi,SIZEOF_INT)], eax
+       mov     INT [COL(2,edi,SIZEOF_INT)], eax
+       mov     INT [COL(3,edi,SIZEOF_INT)], eax
+       jmp     near .nextcolumn
+       alignx  16,7
+
+.columnDCT:
+       push    ecx     ; ctr
+       push    esi     ; coef_block
+       push    edx     ; quantptr
+       push    edi     ; wsptr
+
+       ; -- Even part
+
+       movsx   ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)]
+       movsx   ecx, JCOEF [COL(6,esi,SIZEOF_JCOEF)]
+       movsx   eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    bx, ISLOW_MULT_TYPE [COL(2,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    cx, ISLOW_MULT_TYPE [COL(6,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       imul    ebx,(F_1_847)           ; ebx=MULTIPLY(z2,FIX_1_847759065)
+       imul    ecx,(-F_0_765)          ; ecx=MULTIPLY(z3,-FIX_0_765366865)
+       sal     eax,(CONST_BITS+1)      ; eax=tmp0
+       add     ecx,ebx                 ; ecx=tmp2
+
+       lea     edi,[eax+ecx]           ; edi=tmp10
+       sub     eax,ecx                 ; eax=tmp12
+
+       push    eax             ; tmp12
+       push    edi             ; tmp10
+
+       ; -- Odd part
+
+       movsx   edi, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       movsx   ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       imul    di, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movsx   ebx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       movsx   eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       imul    bx, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       mov     esi,edi         ; esi=edi=z1
+       mov     edx,ecx         ; edx=ecx=z2
+       imul    edi,(-F_0_211)  ; edi=MULTIPLY(z1,-FIX_0_211164243)
+       imul    ecx,(F_1_451)   ; ecx=MULTIPLY(z2,FIX_1_451774981)
+       imul    esi,(-F_0_509)  ; esi=MULTIPLY(z1,-FIX_0_509795579)
+       imul    edx,(-F_0_601)  ; edx=MULTIPLY(z2,-FIX_0_601344887)
+
+       add     edi,ecx         ; edi=(tmp0)
+       add     esi,edx         ; esi=(tmp2)
+
+       mov     ecx,ebx         ; ecx=ebx=z3
+       mov     edx,eax         ; edx=eax=z4
+       imul    ebx,(-F_2_172)  ; ebx=MULTIPLY(z3,-FIX_2_172734803)
+       imul    eax,(F_1_061)   ; eax=MULTIPLY(z4,FIX_1_061594337)
+       imul    ecx,(F_0_899)   ; ecx=MULTIPLY(z3,FIX_0_899976223)
+       imul    edx,(F_2_562)   ; edx=MULTIPLY(z4,FIX_2_562915447)
+
+       add     edi,ebx
+       add     esi,ecx
+       add     edi,eax         ; edi=tmp0
+       add     esi,edx         ; esi=tmp2
+
+       ; -- Final output stage
+
+       pop     ebx             ; ebx=tmp10
+       pop     ecx             ; ecx=tmp12
+
+       lea     eax,[ebx+esi]   ; eax=data0(=tmp10+tmp2)
+       sub     ebx,esi         ; ebx=data3(=tmp10-tmp2)
+       lea     edx,[ecx+edi]   ; edx=data1(=tmp12+tmp0)
+       sub     ecx,edi         ; ecx=data2(=tmp12-tmp0)
+
+       pop     edi     ; wsptr
+
+       descale eax,(CONST_BITS-PASS1_BITS+1)
+       descale ebx,(CONST_BITS-PASS1_BITS+1)
+       descale edx,(CONST_BITS-PASS1_BITS+1)
+       descale ecx,(CONST_BITS-PASS1_BITS+1)
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(3,edi,SIZEOF_INT)], ebx
+       mov     INT [COL(1,edi,SIZEOF_INT)], edx
+       mov     INT [COL(2,edi,SIZEOF_INT)], ecx
+
+       pop     edx     ; quantptr
+       pop     esi     ; coef_block
+       pop     ecx     ; ctr
+
+.nextcolumn:
+       add     esi, byte SIZEOF_JCOEF  ; advance pointers to next column
+       add     edx, byte SIZEOF_ISLOW_MULT_TYPE
+       add     edi, byte SIZEOF_INT
+       dec     ecx
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process 4 rows from work array, store into output array.
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, POINTER [jdstruct_sample_range_limit(eax)]
+       sub     eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit
+       mov     POINTER [range_limit], eax
+
+       lea     esi, [workspace]                        ; int * wsptr
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     ecx, DCTSIZE/2                          ; ctr
+       alignx  16,7
+.rowloop:
+       push    edi
+       mov     edi, JSAMPROW [edi]                     ; (JSAMPLE *)
+       add     edi, JDIMENSION [output_col(ebp)]       ; edi=outptr
+
+%ifndef NO_ZERO_ROW_TEST
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(2,esi,SIZEOF_INT)]
+       jnz     short .rowDCT
+
+       mov     eax, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(5,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(6,esi,SIZEOF_INT)]
+       or      ebx, INT [ROW(7,esi,SIZEOF_INT)]
+       or      eax,ebx
+       jnz     short .rowDCT
+
+       ; -- AC terms all zero
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+
+       mov     edx, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(PASS1_BITS+3)
+       and     eax,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], al
+       jmp     near .nextrow
+       alignx  16,7
+%endif
+.rowDCT:
+       push    esi     ; wsptr
+       push    ecx     ; ctr
+       push    edi     ; outptr
+
+       ; -- Even part
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(2,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(6,esi,SIZEOF_INT)]
+
+       imul    ebx,(F_1_847)           ; ebx=MULTIPLY(z2,FIX_1_847759065)
+       imul    ecx,(-F_0_765)          ; ecx=MULTIPLY(z3,-FIX_0_765366865)
+       sal     eax,(CONST_BITS+1)      ; eax=tmp0
+       add     ecx,ebx                 ; ecx=tmp2
+
+       lea     edi,[eax+ecx]           ; edi=tmp10
+       sub     eax,ecx                 ; eax=tmp12
+
+       push    eax             ; tmp12
+       push    edi             ; tmp10
+
+       ; -- Odd part
+
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(5,esi,SIZEOF_INT)]
+       mov     edi, INT [ROW(7,esi,SIZEOF_INT)]
+
+       mov     esi,edi         ; esi=edi=z1
+       mov     edx,ecx         ; edx=ecx=z2
+       imul    edi,(-F_0_211)  ; edi=MULTIPLY(z1,-FIX_0_211164243)
+       imul    ecx,(F_1_451)   ; ecx=MULTIPLY(z2,FIX_1_451774981)
+       imul    esi,(-F_0_509)  ; esi=MULTIPLY(z1,-FIX_0_509795579)
+       imul    edx,(-F_0_601)  ; edx=MULTIPLY(z2,-FIX_0_601344887)
+
+       add     edi,ecx         ; edi=(tmp0)
+       add     esi,edx         ; esi=(tmp2)
+
+       mov     ecx,ebx         ; ecx=ebx=z3
+       mov     edx,eax         ; edx=eax=z4
+       imul    ebx,(-F_2_172)  ; ebx=MULTIPLY(z3,-FIX_2_172734803)
+       imul    eax,(F_1_061)   ; eax=MULTIPLY(z4,FIX_1_061594337)
+       imul    ecx,(F_0_899)   ; ecx=MULTIPLY(z3,FIX_0_899976223)
+       imul    edx,(F_2_562)   ; edx=MULTIPLY(z4,FIX_2_562915447)
+
+       add     edi,ebx
+       add     esi,ecx
+       add     edi,eax         ; edi=tmp0
+       add     esi,edx         ; esi=tmp2
+
+       ; -- Final output stage
+
+       pop     ebx             ; ebx=tmp10
+       pop     ecx             ; ecx=tmp12
+
+       lea     eax,[ebx+esi]   ; eax=data0(=tmp10+tmp2)
+       sub     ebx,esi         ; ebx=data3(=tmp10-tmp2)
+       lea     edx,[ecx+edi]   ; edx=data1(=tmp12+tmp0)
+       sub     ecx,edi         ; ecx=data2(=tmp12-tmp0)
+
+       mov     esi, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(CONST_BITS+PASS1_BITS+3+1)
+       descale ebx,(CONST_BITS+PASS1_BITS+3+1)
+       descale edx,(CONST_BITS+PASS1_BITS+3+1)
+       descale ecx,(CONST_BITS+PASS1_BITS+3+1)
+
+       pop     edi     ; outptr
+
+       and     eax,RANGE_MASK
+       and     ebx,RANGE_MASK
+       and     edx,RANGE_MASK
+       and     ecx,RANGE_MASK
+
+       mov     al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE]
+       mov     bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE]
+       mov     dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE]
+       mov     cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE]
+
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+3*SIZEOF_JSAMPLE], bl
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], dl
+       mov     JSAMPLE [edi+2*SIZEOF_JSAMPLE], cl
+
+       pop     ecx     ; ctr
+       pop     esi     ; wsptr
+
+.nextrow:
+       pop     edi
+       add     esi, byte DCTSIZE*SIZEOF_INT    ; advance pointer to next row
+       add     edi, byte SIZEOF_JSAMPROW
+       dec     ecx
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                JCOEFPTR coef_block,
+;                JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define range_limit    ebp-SIZEOF_POINTER      ; JSAMPLE * range_limit
+%define workspace      range_limit-(DCTSIZE*2)*SIZEOF_INT
+                                       ; int workspace[DCTSIZE*2]
+
+       align   16
+       global  EXTN(jpeg_idct_2x2)
+
+EXTN(jpeg_idct_2x2):
+       push    ebp
+       mov     ebp,esp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+       lea     edi, [workspace]                        ; int * wsptr
+       mov     ecx, DCTSIZE                            ; ctr
+       alignx  16,7
+.columnloop:
+       ; Don't bother to process columns 2,4,6
+       test    ecx, 0x09
+       jz      near .nextcolumn
+
+       mov     ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       mov     ax, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       or      ax, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero; we need not examine terms 2,4,6 for 2x2 output
+
+       mov     ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       cwde
+
+       sal     eax, PASS1_BITS
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], eax
+       mov     INT [COL(1,edi,SIZEOF_INT)], eax
+       jmp     short .nextcolumn
+       alignx  16,7
+
+.columnDCT:
+       push    ecx     ; ctr
+       push    edi     ; wsptr
+
+       ; -- Odd part
+
+       movsx   eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)]
+       movsx   ebx, JCOEF [COL(3,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    bx, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movsx   ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)]
+       movsx   edi, JCOEF [COL(7,esi,SIZEOF_JCOEF)]
+       imul    cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       imul    di, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       imul    eax,(F_3_624)   ; eax=MULTIPLY(data1,FIX_3_624509785)
+       imul    ebx,(-F_1_272)  ; ebx=MULTIPLY(data3,-FIX_1_272758580)
+       imul    ecx,(F_0_850)   ; ecx=MULTIPLY(data5,FIX_0_850430095)
+       imul    edi,(-F_0_720)  ; edi=MULTIPLY(data7,-FIX_0_720959822)
+
+       add     eax,ebx
+       add     ecx,edi
+       add     ecx,eax         ; ecx=tmp0
+
+       ; -- Even part
+
+       mov     ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       cwde
+
+       sal     eax,(CONST_BITS+2)      ; eax=tmp10
+
+       ; -- Final output stage
+
+       pop     edi     ; wsptr
+
+       lea     ebx,[eax+ecx]   ; ebx=data0(=tmp10+tmp0)
+       sub     eax,ecx         ; eax=data1(=tmp10-tmp0)
+
+       pop     ecx     ; ctr
+
+       descale ebx,(CONST_BITS-PASS1_BITS+2)
+       descale eax,(CONST_BITS-PASS1_BITS+2)
+
+       mov     INT [COL(0,edi,SIZEOF_INT)], ebx
+       mov     INT [COL(1,edi,SIZEOF_INT)], eax
+
+.nextcolumn:
+       add     esi, byte SIZEOF_JCOEF  ; advance pointers to next column
+       add     edx, byte SIZEOF_ISLOW_MULT_TYPE
+       add     edi, byte SIZEOF_INT
+       dec     ecx
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process 2 rows from work array, store into output array.
+
+       mov     eax, POINTER [cinfo(ebp)]
+       mov     eax, POINTER [jdstruct_sample_range_limit(eax)]
+       sub     eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit
+       mov     POINTER [range_limit], eax
+
+       lea     esi, [workspace]                        ; int * wsptr
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.rowloop:
+       push    edi
+       mov     edi, JSAMPROW [edi]                     ; (JSAMPLE *)
+       add     edi, JDIMENSION [output_col(ebp)]       ; edi=outptr
+
+%ifndef NO_ZERO_ROW_TEST
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(3,esi,SIZEOF_INT)]
+       jnz     short .rowDCT
+
+       mov     eax, INT [ROW(5,esi,SIZEOF_INT)]
+       or      eax, INT [ROW(7,esi,SIZEOF_INT)]
+       jnz     short .rowDCT
+
+       ; -- AC terms all zero
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+
+       mov     edx, POINTER [range_limit]      ; (JSAMPLE *)
+
+       descale eax,(PASS1_BITS+3)
+       and     eax,RANGE_MASK
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], al
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+       jmp     short .nextrow
+       alignx  16,7
+%endif
+.rowDCT:
+       push    ecx     ; ctr
+
+       ; -- Odd part
+
+       mov     eax, INT [ROW(1,esi,SIZEOF_INT)]
+       mov     ebx, INT [ROW(3,esi,SIZEOF_INT)]
+       mov     ecx, INT [ROW(5,esi,SIZEOF_INT)]
+       mov     edx, INT [ROW(7,esi,SIZEOF_INT)]
+
+       imul    eax,(F_3_624)   ; eax=MULTIPLY(data1,FIX_3_624509785)
+       imul    ebx,(-F_1_272)  ; ebx=MULTIPLY(data3,-FIX_1_272758580)
+       imul    ecx,(F_0_850)   ; ecx=MULTIPLY(data5,FIX_0_850430095)
+       imul    edx,(-F_0_720)  ; edx=MULTIPLY(data7,-FIX_0_720959822)
+
+       add     eax,ebx
+       add     ecx,edx
+       add     ecx,eax         ; ecx=tmp0
+
+       ; -- Even part
+
+       mov     eax, INT [ROW(0,esi,SIZEOF_INT)]
+
+       sal     eax,(CONST_BITS+2)      ; eax=tmp10
+
+       ; -- Final output stage
+
+       mov     edx, POINTER [range_limit]      ; (JSAMPLE *)
+
+       lea     ebx,[eax+ecx]   ; ebx=data0(=tmp10+tmp0)
+       sub     eax,ecx         ; eax=data1(=tmp10-tmp0)
+
+       pop     ecx     ; ctr
+
+       descale ebx,(CONST_BITS+PASS1_BITS+3+2)
+       descale eax,(CONST_BITS+PASS1_BITS+3+2)
+
+       and     ebx,RANGE_MASK
+       and     eax,RANGE_MASK
+       mov     bl, JSAMPLE [edx+ebx*SIZEOF_JSAMPLE]
+       mov     al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE]
+       mov     JSAMPLE [edi+0*SIZEOF_JSAMPLE], bl
+       mov     JSAMPLE [edi+1*SIZEOF_JSAMPLE], al
+
+.nextrow:
+       pop     edi
+       add     esi, byte DCTSIZE*SIZEOF_INT    ; advance pointer to next row
+       add     edi, byte SIZEOF_JSAMPROW
+       dec     ecx
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 1x1 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                JCOEFPTR coef_block,
+;                JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define ebp            esp-4           ; use esp instead of ebp
+
+       align   16
+       global  EXTN(jpeg_idct_1x1)
+
+EXTN(jpeg_idct_1x1):
+;      push    ebp
+;      mov     ebp,esp
+;      push    ebx             ; unused
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+;      push    edi             ; unused
+
+       ; We hardly need an inverse DCT routine for this: just take the
+       ; average pixel value, which is one-eighth of the DC coefficient.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     ecx, JCOEFPTR [coef_block(ebp)]         ; inptr
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+
+       mov     ax, JCOEF [COL(0,ecx,SIZEOF_JCOEF)]
+       imul    ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       mov     ecx, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     edx, JDIMENSION [output_col(ebp)]
+       mov     ecx, JSAMPROW [ecx]                     ; (JSAMPLE *)
+
+       add     ax, (1 << (3-1)) + (CENTERJSAMPLE << 3)
+       sar     ax,3            ; descale
+
+       test    ah,ah           ; unsigned saturation
+       jz      short .output
+       not     ax
+       sar     ax,15
+       alignx  16,3
+.output:
+       mov     JSAMPLE [ecx+edx*SIZEOF_JSAMPLE], al
+
+;      pop     edi             ; unused
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+;      pop     ebx             ; unused
+;      pop     ebp
+       ret
+
+%endif ; IDCT_SCALING_SUPPORTED
diff --git a/jimmxfst.asm b/jimmxfst.asm
new file mode 100644 (file)
index 0000000..de0def6
--- /dev/null
@@ -0,0 +1,510 @@
+;
+; jimmxfst.asm - fast integer IDCT (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+%ifdef JIDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8       ; 14 is also OK.
+%define PASS1_BITS     2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082        equ     277             ; FIX(1.082392200)
+F_1_414        equ     362             ; FIX(1.414213562)
+F_1_847        equ     473             ; FIX(1.847759065)
+F_2_613        equ     669             ; FIX(2.613125930)
+F_1_613        equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define        DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082        equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414        equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613        equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613        equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+       alignz  16
+       global  EXTN(jconst_idct_ifast_mmx)
+
+EXTN(jconst_idct_ifast_mmx):
+
+PW_F1414       times 4 dw  F_1_414 << CONST_SHIFT
+PW_F1847       times 4 dw  F_1_847 << CONST_SHIFT
+PW_MF1613      times 4 dw -F_1_613 << CONST_SHIFT
+PW_F1082       times 4 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_ifast_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                      JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                       ; JCOEF workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_ifast_mmx)
+
+EXTN(jpeg_idct_ifast_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; JCOEF * wsptr
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     mm1,mm0
+       packsswb mm1,mm1
+       movd    eax,mm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+       punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+       punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+       movq      mm1,mm0
+       punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+       punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+       movq      mm3,mm2
+       punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+       punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+       movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       movq    mm4,mm0
+       movq    mm5,mm1
+       psubw   mm0,mm2                 ; mm0=tmp11
+       psubw   mm1,mm3
+       paddw   mm4,mm2                 ; mm4=tmp10
+       paddw   mm5,mm3                 ; mm5=tmp13
+
+       psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+       psubw   mm1,mm5                 ; mm1=tmp12
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       psubw   mm4,mm5                 ; mm4=tmp3
+       psubw   mm0,mm1                 ; mm0=tmp2
+       paddw   mm6,mm5                 ; mm6=tmp0
+       paddw   mm7,mm1                 ; mm7=tmp1
+
+       movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+       movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
+
+       ; -- Odd part
+
+       movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       movq    mm4,mm2
+       movq    mm0,mm5
+       psubw   mm2,mm1                 ; mm2=z12
+       psubw   mm5,mm3                 ; mm5=z10
+       paddw   mm4,mm1                 ; mm4=z11
+       paddw   mm0,mm3                 ; mm0=z13
+
+       movq    mm1,mm5                 ; mm1=z10(unscaled)
+       psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   mm5,PRE_MULTIPLY_SCALE_BITS
+
+       movq    mm3,mm4
+       psubw   mm4,mm0
+       paddw   mm3,mm0                 ; mm3=tmp7
+
+       psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
+
+       ; To avoid overflow...
+       ;
+       ; (Original)
+       ; tmp12 = -2.613125930 * z10 + z5;
+       ;
+       ; (This implementation)
+       ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+       ;       = -1.613125930 * z10 - z10 + z5;
+
+       movq    mm0,mm5
+       paddw   mm5,mm2
+       pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+       pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+       pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+       psubw   mm0,mm1
+       psubw   mm2,mm5                 ; mm2=tmp10
+       paddw   mm0,mm5                 ; mm0=tmp12
+
+       ; -- Final output stage
+
+       psubw   mm0,mm3                 ; mm0=tmp6
+       movq    mm1,mm6
+       movq    mm5,mm7
+       paddw   mm6,mm3                 ; mm6=data0=(00 01 02 03)
+       paddw   mm7,mm0                 ; mm7=data1=(10 11 12 13)
+       psubw   mm1,mm3                 ; mm1=data7=(70 71 72 73)
+       psubw   mm5,mm0                 ; mm5=data6=(60 61 62 63)
+       psubw   mm4,mm0                 ; mm4=tmp5
+
+       movq      mm3,mm6               ; transpose coefficients(phase 1)
+       punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+       punpckhwd mm3,mm7               ; mm3=(02 12 03 13)
+       movq      mm0,mm5               ; transpose coefficients(phase 1)
+       punpcklwd mm5,mm1               ; mm5=(60 70 61 71)
+       punpckhwd mm0,mm1               ; mm0=(62 72 63 73)
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+       movq    mm1, MMWORD [wk(1)]     ; mm1=tmp3
+
+       movq    MMWORD [wk(0)], mm5     ; wk(0)=(60 70 61 71)
+       movq    MMWORD [wk(1)], mm0     ; wk(1)=(62 72 63 73)
+
+       paddw   mm2,mm4                 ; mm2=tmp4
+       movq    mm5,mm7
+       movq    mm0,mm1
+       paddw   mm7,mm4                 ; mm7=data2=(20 21 22 23)
+       paddw   mm1,mm2                 ; mm1=data4=(40 41 42 43)
+       psubw   mm5,mm4                 ; mm5=data5=(50 51 52 53)
+       psubw   mm0,mm2                 ; mm0=data3=(30 31 32 33)
+
+       movq      mm4,mm7               ; transpose coefficients(phase 1)
+       punpcklwd mm7,mm0               ; mm7=(20 30 21 31)
+       punpckhwd mm4,mm0               ; mm4=(22 32 23 33)
+       movq      mm2,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm5               ; mm1=(40 50 41 51)
+       punpckhwd mm2,mm5               ; mm2=(42 52 43 53)
+
+       movq      mm0,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm7               ; mm6=(00 10 20 30)
+       punpckhdq mm0,mm7               ; mm0=(01 11 21 31)
+       movq      mm5,mm3               ; transpose coefficients(phase 2)
+       punpckldq mm3,mm4               ; mm3=(02 12 22 32)
+       punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=(60 70 61 71)
+       movq    mm4, MMWORD [wk(1)]     ; mm4=(62 72 63 73)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+
+       movq      mm6,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm7               ; mm1=(40 50 60 70)
+       punpckhdq mm6,mm7               ; mm6=(41 51 61 71)
+       movq      mm0,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm4               ; mm2=(42 52 62 72)
+       punpckhdq mm0,mm4               ; mm0=(43 53 63 73)
+
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6
+       movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0
+
+.nextcolumn:
+       add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 4*SIZEOF_IFAST_MULT_TYPE      ; quantptr
+       add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; JCOEF * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.rowloop:
+
+       ; -- Even part
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       movq    mm4,mm0
+       movq    mm5,mm1
+       psubw   mm0,mm2                 ; mm0=tmp11
+       psubw   mm1,mm3
+       paddw   mm4,mm2                 ; mm4=tmp10
+       paddw   mm5,mm3                 ; mm5=tmp13
+
+       psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+       psubw   mm1,mm5                 ; mm1=tmp12
+
+       movq    mm6,mm4
+       movq    mm7,mm0
+       psubw   mm4,mm5                 ; mm4=tmp3
+       psubw   mm0,mm1                 ; mm0=tmp2
+       paddw   mm6,mm5                 ; mm6=tmp0
+       paddw   mm7,mm1                 ; mm7=tmp1
+
+       movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+       movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
+
+       ; -- Odd part
+
+       movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       movq    mm4,mm2
+       movq    mm0,mm5
+       psubw   mm2,mm1                 ; mm2=z12
+       psubw   mm5,mm3                 ; mm5=z10
+       paddw   mm4,mm1                 ; mm4=z11
+       paddw   mm0,mm3                 ; mm0=z13
+
+       movq    mm1,mm5                 ; mm1=z10(unscaled)
+       psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   mm5,PRE_MULTIPLY_SCALE_BITS
+
+       movq    mm3,mm4
+       psubw   mm4,mm0
+       paddw   mm3,mm0                 ; mm3=tmp7
+
+       psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
+
+       ; To avoid overflow...
+       ;
+       ; (Original)
+       ; tmp12 = -2.613125930 * z10 + z5;
+       ;
+       ; (This implementation)
+       ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+       ;       = -1.613125930 * z10 - z10 + z5;
+
+       movq    mm0,mm5
+       paddw   mm5,mm2
+       pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+       pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+       pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+       psubw   mm0,mm1
+       psubw   mm2,mm5                 ; mm2=tmp10
+       paddw   mm0,mm5                 ; mm0=tmp12
+
+       ; -- Final output stage
+
+       psubw   mm0,mm3                 ; mm0=tmp6
+       movq    mm1,mm6
+       movq    mm5,mm7
+       paddw   mm6,mm3                 ; mm6=data0=(00 10 20 30)
+       paddw   mm7,mm0                 ; mm7=data1=(01 11 21 31)
+       psraw   mm6,(PASS1_BITS+3)      ; descale
+       psraw   mm7,(PASS1_BITS+3)      ; descale
+       psubw   mm1,mm3                 ; mm1=data7=(07 17 27 37)
+       psubw   mm5,mm0                 ; mm5=data6=(06 16 26 36)
+       psraw   mm1,(PASS1_BITS+3)      ; descale
+       psraw   mm5,(PASS1_BITS+3)      ; descale
+       psubw   mm4,mm0                 ; mm4=tmp5
+
+       packsswb  mm6,mm5               ; mm6=(00 10 20 30 06 16 26 36)
+       packsswb  mm7,mm1               ; mm7=(01 11 21 31 07 17 27 37)
+
+       movq    mm3, MMWORD [wk(0)]     ; mm3=tmp2
+       movq    mm0, MMWORD [wk(1)]     ; mm0=tmp3
+
+       paddw   mm2,mm4                 ; mm2=tmp4
+       movq    mm5,mm3
+       movq    mm1,mm0
+       paddw   mm3,mm4                 ; mm3=data2=(02 12 22 32)
+       paddw   mm0,mm2                 ; mm0=data4=(04 14 24 34)
+       psraw   mm3,(PASS1_BITS+3)      ; descale
+       psraw   mm0,(PASS1_BITS+3)      ; descale
+       psubw   mm5,mm4                 ; mm5=data5=(05 15 25 35)
+       psubw   mm1,mm2                 ; mm1=data3=(03 13 23 33)
+       psraw   mm5,(PASS1_BITS+3)      ; descale
+       psraw   mm1,(PASS1_BITS+3)      ; descale
+
+       movq      mm4,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm4=[PB_CENTERJSAMP]
+
+       packsswb  mm3,mm0               ; mm3=(02 12 22 32 04 14 24 34)
+       packsswb  mm1,mm5               ; mm1=(03 13 23 33 05 15 25 35)
+
+       paddb     mm6,mm4
+       paddb     mm7,mm4
+       paddb     mm3,mm4
+       paddb     mm1,mm4
+
+       movq      mm2,mm6               ; transpose coefficients(phase 1)
+       punpcklbw mm6,mm7               ; mm6=(00 01 10 11 20 21 30 31)
+       punpckhbw mm2,mm7               ; mm2=(06 07 16 17 26 27 36 37)
+       movq      mm0,mm3               ; transpose coefficients(phase 1)
+       punpcklbw mm3,mm1               ; mm3=(02 03 12 13 22 23 32 33)
+       punpckhbw mm0,mm1               ; mm0=(04 05 14 15 24 25 34 35)
+
+       movq      mm5,mm6               ; transpose coefficients(phase 2)
+       punpcklwd mm6,mm3               ; mm6=(00 01 02 03 10 11 12 13)
+       punpckhwd mm5,mm3               ; mm5=(20 21 22 23 30 31 32 33)
+       movq      mm4,mm0               ; transpose coefficients(phase 2)
+       punpcklwd mm0,mm2               ; mm0=(04 05 06 07 14 15 16 17)
+       punpckhwd mm4,mm2               ; mm4=(24 25 26 27 34 35 36 37)
+
+       movq      mm7,mm6               ; transpose coefficients(phase 3)
+       punpckldq mm6,mm0               ; mm6=(00 01 02 03 04 05 06 07)
+       punpckhdq mm7,mm0               ; mm7=(10 11 12 13 14 15 16 17)
+       movq      mm1,mm5               ; transpose coefficients(phase 3)
+       punpckldq mm5,mm4               ; mm5=(20 21 22 23 24 25 26 27)
+       punpckhdq mm1,mm4               ; mm1=(30 31 32 33 34 35 36 37)
+
+       pushpic ebx                     ; save GOT address
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+       mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+
+       poppic  ebx                     ; restore GOT address
+
+       add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+       add     edi, byte 4*SIZEOF_JSAMPROW
+       dec     ecx                             ; ctr
+       jnz     near .rowloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_MMX_SUPPORTED
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jimmxint.asm b/jimmxint.asm
new file mode 100644 (file)
index 0000000..2a33a63
--- /dev/null
@@ -0,0 +1,862 @@
+;
+; jimmxint.asm - accurate integer IDCT (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+%ifdef JIDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1     (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2     (CONST_BITS+PASS1_BITS+3)
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_islow_mmx)
+
+EXTN(jconst_idct_islow_mmx):
+
+PW_F130_F054   times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130  times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117  times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078   times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089 times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060  times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256 times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050  times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_islow_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                      JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         12
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                       ; JCOEF workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_islow_mmx)
+
+EXTN(jpeg_idct_islow_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; JCOEF * wsptr
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     mm1,mm0
+       packsswb mm1,mm1
+       movd    eax,mm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       psllw   mm0,PASS1_BITS
+
+       movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+       punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+       punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+       movq      mm1,mm0
+       punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+       punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+       movq      mm3,mm2
+       punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+       punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+       movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; (Original)
+       ; z1 = (z2 + z3) * 0.541196100;
+       ; tmp2 = z1 + z3 * -1.847759065;
+       ; tmp3 = z1 + z2 * 0.765366865;
+       ;
+       ; (This implementation)
+       ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+       ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+       movq      mm4,mm1               ; mm1=in2=z2
+       movq      mm5,mm1
+       punpcklwd mm4,mm3               ; mm3=in6=z3
+       punpckhwd mm5,mm3
+       movq      mm1,mm4
+       movq      mm3,mm5
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
+
+       movq      mm6,mm0
+       paddw     mm0,mm2               ; mm0=in0+in4
+       psubw     mm6,mm2               ; mm6=in0-in4
+
+       pxor      mm7,mm7
+       pxor      mm2,mm2
+       punpcklwd mm7,mm0               ; mm7=tmp0L
+       punpckhwd mm2,mm0               ; mm2=tmp0H
+       psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+       psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
+
+       movq    mm0,mm7
+       paddd   mm7,mm4                 ; mm7=tmp10L
+       psubd   mm0,mm4                 ; mm0=tmp13L
+       movq    mm4,mm2
+       paddd   mm2,mm5                 ; mm2=tmp10H
+       psubd   mm4,mm5                 ; mm4=tmp13H
+
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+       movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+       movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
+
+       pxor      mm5,mm5
+       pxor      mm7,mm7
+       punpcklwd mm5,mm6               ; mm5=tmp1L
+       punpckhwd mm7,mm6               ; mm7=tmp1H
+       psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+       psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+
+       movq    mm2,mm5
+       paddd   mm5,mm1                 ; mm5=tmp11L
+       psubd   mm2,mm1                 ; mm2=tmp12L
+       movq    mm0,mm7
+       paddd   mm7,mm3                 ; mm7=tmp11H
+       psubd   mm0,mm3                 ; mm0=tmp12H
+
+       movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+       movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+       movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+       movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
+
+       ; -- Odd part
+
+       movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       movq    mm5,mm6
+       movq    mm7,mm4
+       paddw   mm5,mm3                 ; mm5=z3
+       paddw   mm7,mm1                 ; mm7=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movq      mm2,mm5
+       movq      mm0,mm5
+       punpcklwd mm2,mm7
+       punpckhwd mm0,mm7
+       movq      mm5,mm2
+       movq      mm7,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
+
+       movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+       movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
+
+       ; (Original)
+       ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+       ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+       ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+       ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+       ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+       ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+       ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+       ; tmp0 += z3;  tmp1 += z4;
+       ; tmp2 += z3;  tmp3 += z4;
+
+       movq      mm2,mm3
+       movq      mm0,mm3
+       punpcklwd mm2,mm4
+       punpckhwd mm0,mm4
+       movq      mm3,mm2
+       movq      mm4,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+       pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
+
+       paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+       paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+       paddd   mm3,mm5                 ; mm3=tmp3L
+       paddd   mm4,mm7                 ; mm4=tmp3H
+
+       movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+       movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
+
+       movq      mm2,mm1
+       movq      mm0,mm1
+       punpcklwd mm2,mm6
+       punpckhwd mm0,mm6
+       movq      mm1,mm2
+       movq      mm6,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
+
+       paddd   mm2,mm5                 ; mm2=tmp1L
+       paddd   mm0,mm7                 ; mm0=tmp1H
+       paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+       paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
+
+       movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+       movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
+
+       ; -- Final output stage
+
+       movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+       movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
+
+       movq    mm2,mm5
+       movq    mm0,mm7
+       paddd   mm5,mm3                 ; mm5=data0L
+       paddd   mm7,mm4                 ; mm7=data0H
+       psubd   mm2,mm3                 ; mm2=data7L
+       psubd   mm0,mm4                 ; mm0=data7H
+
+       movq    mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1]
+
+       paddd   mm5,mm3
+       paddd   mm7,mm3
+       psrad   mm5,DESCALE_P1
+       psrad   mm7,DESCALE_P1
+       paddd   mm2,mm3
+       paddd   mm0,mm3
+       psrad   mm2,DESCALE_P1
+       psrad   mm0,DESCALE_P1
+
+       packssdw  mm5,mm7               ; mm5=data0=(00 01 02 03)
+       packssdw  mm2,mm0               ; mm2=data7=(70 71 72 73)
+
+       movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+       movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
+
+       movq    mm7,mm4
+       movq    mm0,mm3
+       paddd   mm4,mm1                 ; mm4=data1L
+       paddd   mm3,mm6                 ; mm3=data1H
+       psubd   mm7,mm1                 ; mm7=data6L
+       psubd   mm0,mm6                 ; mm0=data6H
+
+       movq    mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1]
+
+       paddd   mm4,mm1
+       paddd   mm3,mm1
+       psrad   mm4,DESCALE_P1
+       psrad   mm3,DESCALE_P1
+       paddd   mm7,mm1
+       paddd   mm0,mm1
+       psrad   mm7,DESCALE_P1
+       psrad   mm0,DESCALE_P1
+
+       packssdw  mm4,mm3               ; mm4=data1=(10 11 12 13)
+       packssdw  mm7,mm0               ; mm7=data6=(60 61 62 63)
+
+       movq      mm6,mm5               ; transpose coefficients(phase 1)
+       punpcklwd mm5,mm4               ; mm5=(00 10 01 11)
+       punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+       movq      mm1,mm7               ; transpose coefficients(phase 1)
+       punpcklwd mm7,mm2               ; mm7=(60 70 61 71)
+       punpckhwd mm1,mm2               ; mm1=(62 72 63 73)
+
+       movq    mm3, MMWORD [wk(6)]     ; mm3=tmp12L
+       movq    mm0, MMWORD [wk(7)]     ; mm0=tmp12H
+       movq    mm4, MMWORD [wk(10)]    ; mm4=tmp1L
+       movq    mm2, MMWORD [wk(11)]    ; mm2=tmp1H
+
+       movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 01 11)
+       movq    MMWORD [wk(1)], mm6     ; wk(1)=(02 12 03 13)
+       movq    MMWORD [wk(4)], mm7     ; wk(4)=(60 70 61 71)
+       movq    MMWORD [wk(5)], mm1     ; wk(5)=(62 72 63 73)
+
+       movq    mm5,mm3
+       movq    mm6,mm0
+       paddd   mm3,mm4                 ; mm3=data2L
+       paddd   mm0,mm2                 ; mm0=data2H
+       psubd   mm5,mm4                 ; mm5=data5L
+       psubd   mm6,mm2                 ; mm6=data5H
+
+       movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1]
+
+       paddd   mm3,mm7
+       paddd   mm0,mm7
+       psrad   mm3,DESCALE_P1
+       psrad   mm0,DESCALE_P1
+       paddd   mm5,mm7
+       paddd   mm6,mm7
+       psrad   mm5,DESCALE_P1
+       psrad   mm6,DESCALE_P1
+
+       packssdw  mm3,mm0               ; mm3=data2=(20 21 22 23)
+       packssdw  mm5,mm6               ; mm5=data5=(50 51 52 53)
+
+       movq    mm1, MMWORD [wk(2)]     ; mm1=tmp13L
+       movq    mm4, MMWORD [wk(3)]     ; mm4=tmp13H
+       movq    mm2, MMWORD [wk(8)]     ; mm2=tmp0L
+       movq    mm7, MMWORD [wk(9)]     ; mm7=tmp0H
+
+       movq    mm0,mm1
+       movq    mm6,mm4
+       paddd   mm1,mm2                 ; mm1=data3L
+       paddd   mm4,mm7                 ; mm4=data3H
+       psubd   mm0,mm2                 ; mm0=data4L
+       psubd   mm6,mm7                 ; mm6=data4H
+
+       movq    mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1]
+
+       paddd   mm1,mm2
+       paddd   mm4,mm2
+       psrad   mm1,DESCALE_P1
+       psrad   mm4,DESCALE_P1
+       paddd   mm0,mm2
+       paddd   mm6,mm2
+       psrad   mm0,DESCALE_P1
+       psrad   mm6,DESCALE_P1
+
+       packssdw  mm1,mm4               ; mm1=data3=(30 31 32 33)
+       packssdw  mm0,mm6               ; mm0=data4=(40 41 42 43)
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=(00 10 01 11)
+       movq    mm2, MMWORD [wk(1)]     ; mm2=(02 12 03 13)
+
+       movq      mm4,mm3               ; transpose coefficients(phase 1)
+       punpcklwd mm3,mm1               ; mm3=(20 30 21 31)
+       punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+       movq      mm6,mm0               ; transpose coefficients(phase 1)
+       punpcklwd mm0,mm5               ; mm0=(40 50 41 51)
+       punpckhwd mm6,mm5               ; mm6=(42 52 43 53)
+
+       movq      mm1,mm7               ; transpose coefficients(phase 2)
+       punpckldq mm7,mm3               ; mm7=(00 10 20 30)
+       punpckhdq mm1,mm3               ; mm1=(01 11 21 31)
+       movq      mm5,mm2               ; transpose coefficients(phase 2)
+       punpckldq mm2,mm4               ; mm2=(02 12 22 32)
+       punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
+
+       movq    mm3, MMWORD [wk(4)]     ; mm3=(60 70 61 71)
+       movq    mm4, MMWORD [wk(5)]     ; mm4=(62 72 63 73)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+
+       movq      mm7,mm0               ; transpose coefficients(phase 2)
+       punpckldq mm0,mm3               ; mm0=(40 50 60 70)
+       punpckhdq mm7,mm3               ; mm7=(41 51 61 71)
+       movq      mm1,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm4               ; mm6=(42 52 62 72)
+       punpckhdq mm1,mm4               ; mm1=(43 53 63 73)
+
+       movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7
+       movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6
+       movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1
+
+.nextcolumn:
+       add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+       add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; JCOEF * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.rowloop:
+
+       ; -- Even part
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       ; (Original)
+       ; z1 = (z2 + z3) * 0.541196100;
+       ; tmp2 = z1 + z3 * -1.847759065;
+       ; tmp3 = z1 + z2 * 0.765366865;
+       ;
+       ; (This implementation)
+       ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+       ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+       movq      mm4,mm1               ; mm1=in2=z2
+       movq      mm5,mm1
+       punpcklwd mm4,mm3               ; mm3=in6=z3
+       punpckhwd mm5,mm3
+       movq      mm1,mm4
+       movq      mm3,mm5
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
+
+       movq      mm6,mm0
+       paddw     mm0,mm2               ; mm0=in0+in4
+       psubw     mm6,mm2               ; mm6=in0-in4
+
+       pxor      mm7,mm7
+       pxor      mm2,mm2
+       punpcklwd mm7,mm0               ; mm7=tmp0L
+       punpckhwd mm2,mm0               ; mm2=tmp0H
+       psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+       psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
+
+       movq    mm0,mm7
+       paddd   mm7,mm4                 ; mm7=tmp10L
+       psubd   mm0,mm4                 ; mm0=tmp13L
+       movq    mm4,mm2
+       paddd   mm2,mm5                 ; mm2=tmp10H
+       psubd   mm4,mm5                 ; mm4=tmp13H
+
+       movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+       movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+       movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+       movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
+
+       pxor      mm5,mm5
+       pxor      mm7,mm7
+       punpcklwd mm5,mm6               ; mm5=tmp1L
+       punpckhwd mm7,mm6               ; mm7=tmp1H
+       psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+       psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+
+       movq    mm2,mm5
+       paddd   mm5,mm1                 ; mm5=tmp11L
+       psubd   mm2,mm1                 ; mm2=tmp12L
+       movq    mm0,mm7
+       paddd   mm7,mm3                 ; mm7=tmp11H
+       psubd   mm0,mm3                 ; mm0=tmp12H
+
+       movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+       movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+       movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+       movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
+
+       ; -- Odd part
+
+       movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       movq    mm5,mm6
+       movq    mm7,mm4
+       paddw   mm5,mm3                 ; mm5=z3
+       paddw   mm7,mm1                 ; mm7=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movq      mm2,mm5
+       movq      mm0,mm5
+       punpcklwd mm2,mm7
+       punpckhwd mm0,mm7
+       movq      mm5,mm2
+       movq      mm7,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+       pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
+
+       movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+       movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
+
+       ; (Original)
+       ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+       ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+       ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+       ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+       ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+       ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+       ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+       ; tmp0 += z3;  tmp1 += z4;
+       ; tmp2 += z3;  tmp3 += z4;
+
+       movq      mm2,mm3
+       movq      mm0,mm3
+       punpcklwd mm2,mm4
+       punpckhwd mm0,mm4
+       movq      mm3,mm2
+       movq      mm4,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+       pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+       pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
+
+       paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+       paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+       paddd   mm3,mm5                 ; mm3=tmp3L
+       paddd   mm4,mm7                 ; mm4=tmp3H
+
+       movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+       movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
+
+       movq      mm2,mm1
+       movq      mm0,mm1
+       punpcklwd mm2,mm6
+       punpckhwd mm0,mm6
+       movq      mm1,mm2
+       movq      mm6,mm0
+       pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+       pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+       pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+       pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
+
+       paddd   mm2,mm5                 ; mm2=tmp1L
+       paddd   mm0,mm7                 ; mm0=tmp1H
+       paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+       paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
+
+       movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+       movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
+
+       ; -- Final output stage
+
+       movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+       movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
+
+       movq    mm2,mm5
+       movq    mm0,mm7
+       paddd   mm5,mm3                 ; mm5=data0L
+       paddd   mm7,mm4                 ; mm7=data0H
+       psubd   mm2,mm3                 ; mm2=data7L
+       psubd   mm0,mm4                 ; mm0=data7H
+
+       movq    mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2]
+
+       paddd   mm5,mm3
+       paddd   mm7,mm3
+       psrad   mm5,DESCALE_P2
+       psrad   mm7,DESCALE_P2
+       paddd   mm2,mm3
+       paddd   mm0,mm3
+       psrad   mm2,DESCALE_P2
+       psrad   mm0,DESCALE_P2
+
+       packssdw  mm5,mm7               ; mm5=data0=(00 10 20 30)
+       packssdw  mm2,mm0               ; mm2=data7=(07 17 27 37)
+
+       movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+       movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
+
+       movq    mm7,mm4
+       movq    mm0,mm3
+       paddd   mm4,mm1                 ; mm4=data1L
+       paddd   mm3,mm6                 ; mm3=data1H
+       psubd   mm7,mm1                 ; mm7=data6L
+       psubd   mm0,mm6                 ; mm0=data6H
+
+       movq    mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2]
+
+       paddd   mm4,mm1
+       paddd   mm3,mm1
+       psrad   mm4,DESCALE_P2
+       psrad   mm3,DESCALE_P2
+       paddd   mm7,mm1
+       paddd   mm0,mm1
+       psrad   mm7,DESCALE_P2
+       psrad   mm0,DESCALE_P2
+
+       packssdw  mm4,mm3               ; mm4=data1=(01 11 21 31)
+       packssdw  mm7,mm0               ; mm7=data6=(06 16 26 36)
+
+       packsswb  mm5,mm7               ; mm5=(00 10 20 30 06 16 26 36)
+       packsswb  mm4,mm2               ; mm4=(01 11 21 31 07 17 27 37)
+
+       movq    mm6, MMWORD [wk(6)]     ; mm6=tmp12L
+       movq    mm1, MMWORD [wk(7)]     ; mm1=tmp12H
+       movq    mm3, MMWORD [wk(10)]    ; mm3=tmp1L
+       movq    mm0, MMWORD [wk(11)]    ; mm0=tmp1H
+
+       movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 20 30 06 16 26 36)
+       movq    MMWORD [wk(1)], mm4     ; wk(1)=(01 11 21 31 07 17 27 37)
+
+       movq    mm7,mm6
+       movq    mm2,mm1
+       paddd   mm6,mm3                 ; mm6=data2L
+       paddd   mm1,mm0                 ; mm1=data2H
+       psubd   mm7,mm3                 ; mm7=data5L
+       psubd   mm2,mm0                 ; mm2=data5H
+
+       movq    mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2]
+
+       paddd   mm6,mm5
+       paddd   mm1,mm5
+       psrad   mm6,DESCALE_P2
+       psrad   mm1,DESCALE_P2
+       paddd   mm7,mm5
+       paddd   mm2,mm5
+       psrad   mm7,DESCALE_P2
+       psrad   mm2,DESCALE_P2
+
+       packssdw  mm6,mm1               ; mm6=data2=(02 12 22 32)
+       packssdw  mm7,mm2               ; mm7=data5=(05 15 25 35)
+
+       movq    mm4, MMWORD [wk(2)]     ; mm4=tmp13L
+       movq    mm3, MMWORD [wk(3)]     ; mm3=tmp13H
+       movq    mm0, MMWORD [wk(8)]     ; mm0=tmp0L
+       movq    mm5, MMWORD [wk(9)]     ; mm5=tmp0H
+
+       movq    mm1,mm4
+       movq    mm2,mm3
+       paddd   mm4,mm0                 ; mm4=data3L
+       paddd   mm3,mm5                 ; mm3=data3H
+       psubd   mm1,mm0                 ; mm1=data4L
+       psubd   mm2,mm5                 ; mm2=data4H
+
+       movq    mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2]
+
+       paddd   mm4,mm0
+       paddd   mm3,mm0
+       psrad   mm4,DESCALE_P2
+       psrad   mm3,DESCALE_P2
+       paddd   mm1,mm0
+       paddd   mm2,mm0
+       psrad   mm1,DESCALE_P2
+       psrad   mm2,DESCALE_P2
+
+       movq      mm5,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm5=[PB_CENTERJSAMP]
+
+       packssdw  mm4,mm3               ; mm4=data3=(03 13 23 33)
+       packssdw  mm1,mm2               ; mm1=data4=(04 14 24 34)
+
+       movq      mm0, MMWORD [wk(0)]   ; mm0=(00 10 20 30 06 16 26 36)
+       movq      mm3, MMWORD [wk(1)]   ; mm3=(01 11 21 31 07 17 27 37)
+
+       packsswb  mm6,mm1               ; mm6=(02 12 22 32 04 14 24 34)
+       packsswb  mm4,mm7               ; mm4=(03 13 23 33 05 15 25 35)
+
+       paddb     mm0,mm5
+       paddb     mm3,mm5
+       paddb     mm6,mm5
+       paddb     mm4,mm5
+
+       movq      mm2,mm0               ; transpose coefficients(phase 1)
+       punpcklbw mm0,mm3               ; mm0=(00 01 10 11 20 21 30 31)
+       punpckhbw mm2,mm3               ; mm2=(06 07 16 17 26 27 36 37)
+       movq      mm1,mm6               ; transpose coefficients(phase 1)
+       punpcklbw mm6,mm4               ; mm6=(02 03 12 13 22 23 32 33)
+       punpckhbw mm1,mm4               ; mm1=(04 05 14 15 24 25 34 35)
+
+       movq      mm7,mm0               ; transpose coefficients(phase 2)
+       punpcklwd mm0,mm6               ; mm0=(00 01 02 03 10 11 12 13)
+       punpckhwd mm7,mm6               ; mm7=(20 21 22 23 30 31 32 33)
+       movq      mm5,mm1               ; transpose coefficients(phase 2)
+       punpcklwd mm1,mm2               ; mm1=(04 05 06 07 14 15 16 17)
+       punpckhwd mm5,mm2               ; mm5=(24 25 26 27 34 35 36 37)
+
+       movq      mm3,mm0               ; transpose coefficients(phase 3)
+       punpckldq mm0,mm1               ; mm0=(00 01 02 03 04 05 06 07)
+       punpckhdq mm3,mm1               ; mm3=(10 11 12 13 14 15 16 17)
+       movq      mm4,mm7               ; transpose coefficients(phase 3)
+       punpckldq mm7,mm5               ; mm7=(20 21 22 23 24 25 26 27)
+       punpckhdq mm4,mm5               ; mm4=(30 31 32 33 34 35 36 37)
+
+       pushpic ebx                     ; save GOT address
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3
+       mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+
+       poppic  ebx                     ; restore GOT address
+
+       add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+       add     edi, byte 4*SIZEOF_JSAMPROW
+       dec     ecx                             ; ctr
+       jnz     near .rowloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_MMX_SUPPORTED
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jimmxred.asm b/jimmxred.asm
new file mode 100644 (file)
index 0000000..491fa7b
--- /dev/null
@@ -0,0 +1,719 @@
+;
+; jimmxred.asm - reduced-size IDCT (MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size
+; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef IDCT_SCALING_SUPPORTED
+%ifdef JIDCT_INT_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1_4   (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4   (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2   (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2   (CONST_BITS+PASS1_BITS+3+2)
+
+%if CONST_BITS == 13
+F_0_211        equ      1730           ; FIX(0.211164243)
+F_0_509        equ      4176           ; FIX(0.509795579)
+F_0_601        equ      4926           ; FIX(0.601344887)
+F_0_720        equ      5906           ; FIX(0.720959822)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_850        equ      6967           ; FIX(0.850430095)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_061        equ      8697           ; FIX(1.061594337)
+F_1_272        equ     10426           ; FIX(1.272758580)
+F_1_451        equ     11893           ; FIX(1.451774981)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_2_172        equ     17799           ; FIX(2.172734803)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_624        equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211        equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509        equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601        equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720        equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850        equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061        equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272        equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451        equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172        equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624        equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_red_mmx)
+
+EXTN(jconst_idct_red_mmx):
+
+PW_F184_MF076  times 2 dw  F_1_847,-F_0_765
+PW_F256_F089   times 2 dw  F_2_562, F_0_899
+PW_F106_MF217  times 2 dw  F_1_061,-F_2_172
+PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509
+PW_F145_MF021  times 2 dw  F_1_451,-F_0_211
+PW_F362_MF127  times 2 dw  F_3_624,-F_1_272
+PW_F085_MF072  times 2 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4        times 2 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4        times 2 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2        times 2 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2        times 2 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_4x4_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                    JCOEFPTR coef_block,
+;                    JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM         2
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                       ; JCOEF workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_4x4_mmx)
+
+EXTN(jpeg_idct_4x4_mmx):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       pushpic ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; JCOEF * wsptr
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     mm0,mm1
+       packsswb mm0,mm0
+       movd    eax,mm0
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       psllw   mm0,PASS1_BITS
+
+       movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+       punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+       punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+       movq      mm1,mm0
+       punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+       punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+       movq      mm3,mm2
+       punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+       punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Odd part
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       movq      mm4,mm0
+       movq      mm5,mm0
+       punpcklwd mm4,mm1
+       punpckhwd mm5,mm1
+       movq      mm0,mm4
+       movq      mm1,mm5
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
+
+       movq      mm6,mm2
+       movq      mm7,mm2
+       punpcklwd mm6,mm3
+       punpckhwd mm7,mm3
+       movq      mm2,mm6
+       movq      mm3,mm7
+       pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+       pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
+
+       paddd   mm6,mm4                 ; mm6=tmp2L
+       paddd   mm7,mm5                 ; mm7=tmp2H
+       paddd   mm2,mm0                 ; mm2=tmp0L
+       paddd   mm3,mm1                 ; mm3=tmp0H
+
+       movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+       movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
+
+       ; -- Even part
+
+       movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       pxor      mm1,mm1
+       pxor      mm2,mm2
+       punpcklwd mm1,mm4               ; mm1=tmp0L
+       punpckhwd mm2,mm4               ; mm2=tmp0H
+       psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+       psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
+
+       movq      mm3,mm5               ; mm5=in2=z2
+       punpcklwd mm5,mm0               ; mm0=in6=z3
+       punpckhwd mm3,mm0
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
+
+       movq    mm4,mm1
+       movq    mm0,mm2
+       paddd   mm1,mm5                 ; mm1=tmp10L
+       paddd   mm2,mm3                 ; mm2=tmp10H
+       psubd   mm4,mm5                 ; mm4=tmp12L
+       psubd   mm0,mm3                 ; mm0=tmp12H
+
+       ; -- Final output stage
+
+       movq    mm5,mm1
+       movq    mm3,mm2
+       paddd   mm1,mm6                 ; mm1=data0L
+       paddd   mm2,mm7                 ; mm2=data0H
+       psubd   mm5,mm6                 ; mm5=data3L
+       psubd   mm3,mm7                 ; mm3=data3H
+
+       movq    mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm6=[PD_DESCALE_P1_4]
+
+       paddd   mm1,mm6
+       paddd   mm2,mm6
+       psrad   mm1,DESCALE_P1_4
+       psrad   mm2,DESCALE_P1_4
+       paddd   mm5,mm6
+       paddd   mm3,mm6
+       psrad   mm5,DESCALE_P1_4
+       psrad   mm3,DESCALE_P1_4
+
+       packssdw  mm1,mm2               ; mm1=data0=(00 01 02 03)
+       packssdw  mm5,mm3               ; mm5=data3=(30 31 32 33)
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+       movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
+
+       movq    mm2,mm4
+       movq    mm3,mm0
+       paddd   mm4,mm7                 ; mm4=data1L
+       paddd   mm0,mm6                 ; mm0=data1H
+       psubd   mm2,mm7                 ; mm2=data2L
+       psubd   mm3,mm6                 ; mm3=data2H
+
+       movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm7=[PD_DESCALE_P1_4]
+
+       paddd   mm4,mm7
+       paddd   mm0,mm7
+       psrad   mm4,DESCALE_P1_4
+       psrad   mm0,DESCALE_P1_4
+       paddd   mm2,mm7
+       paddd   mm3,mm7
+       psrad   mm2,DESCALE_P1_4
+       psrad   mm3,DESCALE_P1_4
+
+       packssdw  mm4,mm0               ; mm4=data1=(10 11 12 13)
+       packssdw  mm2,mm3               ; mm2=data2=(20 21 22 23)
+
+       movq      mm6,mm1               ; transpose coefficients(phase 1)
+       punpcklwd mm1,mm4               ; mm1=(00 10 01 11)
+       punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+       movq      mm7,mm2               ; transpose coefficients(phase 1)
+       punpcklwd mm2,mm5               ; mm2=(20 30 21 31)
+       punpckhwd mm7,mm5               ; mm7=(22 32 23 33)
+
+       movq      mm0,mm1               ; transpose coefficients(phase 2)
+       punpckldq mm1,mm2               ; mm1=(00 10 20 30)
+       punpckhdq mm0,mm2               ; mm0=(01 11 21 31)
+       movq      mm3,mm6               ; transpose coefficients(phase 2)
+       punpckldq mm6,mm7               ; mm6=(02 12 22 32)
+       punpckhdq mm3,mm7               ; mm3=(03 13 23 33)
+
+       movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1
+       movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+       movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6
+       movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+
+.nextcolumn:
+       add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+       add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; JCOEF * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+
+       ; -- Odd part
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       movq      mm4,mm0
+       movq      mm5,mm0
+       punpcklwd mm4,mm1
+       punpckhwd mm5,mm1
+       movq      mm0,mm4
+       movq      mm1,mm5
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+       pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+       pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
+
+       movq      mm6,mm2
+       movq      mm7,mm2
+       punpcklwd mm6,mm3
+       punpckhwd mm7,mm3
+       movq      mm2,mm6
+       movq      mm3,mm7
+       pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+       pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+       pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
+
+       paddd   mm6,mm4                 ; mm6=tmp2L
+       paddd   mm7,mm5                 ; mm7=tmp2H
+       paddd   mm2,mm0                 ; mm2=tmp0L
+       paddd   mm3,mm1                 ; mm3=tmp0H
+
+       movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+       movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
+
+       ; -- Even part
+
+       movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       pxor      mm1,mm1
+       pxor      mm2,mm2
+       punpcklwd mm1,mm4               ; mm1=tmp0L
+       punpckhwd mm2,mm4               ; mm2=tmp0H
+       psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+       psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
+
+       movq      mm3,mm5               ; mm5=in2=z2
+       punpcklwd mm5,mm0               ; mm0=in6=z3
+       punpckhwd mm3,mm0
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+       pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
+
+       movq    mm4,mm1
+       movq    mm0,mm2
+       paddd   mm1,mm5                 ; mm1=tmp10L
+       paddd   mm2,mm3                 ; mm2=tmp10H
+       psubd   mm4,mm5                 ; mm4=tmp12L
+       psubd   mm0,mm3                 ; mm0=tmp12H
+
+       ; -- Final output stage
+
+       movq    mm5,mm1
+       movq    mm3,mm2
+       paddd   mm1,mm6                 ; mm1=data0L
+       paddd   mm2,mm7                 ; mm2=data0H
+       psubd   mm5,mm6                 ; mm5=data3L
+       psubd   mm3,mm7                 ; mm3=data3H
+
+       movq    mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm6=[PD_DESCALE_P2_4]
+
+       paddd   mm1,mm6
+       paddd   mm2,mm6
+       psrad   mm1,DESCALE_P2_4
+       psrad   mm2,DESCALE_P2_4
+       paddd   mm5,mm6
+       paddd   mm3,mm6
+       psrad   mm5,DESCALE_P2_4
+       psrad   mm3,DESCALE_P2_4
+
+       packssdw  mm1,mm2               ; mm1=data0=(00 10 20 30)
+       packssdw  mm5,mm3               ; mm5=data3=(03 13 23 33)
+
+       movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+       movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
+
+       movq    mm2,mm4
+       movq    mm3,mm0
+       paddd   mm4,mm7                 ; mm4=data1L
+       paddd   mm0,mm6                 ; mm0=data1H
+       psubd   mm2,mm7                 ; mm2=data2L
+       psubd   mm3,mm6                 ; mm3=data2H
+
+       movq    mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm7=[PD_DESCALE_P2_4]
+
+       paddd   mm4,mm7
+       paddd   mm0,mm7
+       psrad   mm4,DESCALE_P2_4
+       psrad   mm0,DESCALE_P2_4
+       paddd   mm2,mm7
+       paddd   mm3,mm7
+       psrad   mm2,DESCALE_P2_4
+       psrad   mm3,DESCALE_P2_4
+
+       packssdw  mm4,mm0               ; mm4=data1=(01 11 21 31)
+       packssdw  mm2,mm3               ; mm2=data2=(02 12 22 32)
+
+       movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
+
+       packsswb  mm1,mm2               ; mm1=(00 10 20 30 02 12 22 32)
+       packsswb  mm4,mm5               ; mm4=(01 11 21 31 03 13 23 33)
+       paddb     mm1,mm6
+       paddb     mm4,mm6
+
+       movq      mm7,mm1               ; transpose coefficients(phase 1)
+       punpcklbw mm1,mm4               ; mm1=(00 01 10 11 20 21 30 31)
+       punpckhbw mm7,mm4               ; mm7=(02 03 12 13 22 23 32 33)
+
+       movq      mm0,mm1               ; transpose coefficients(phase 2)
+       punpcklwd mm1,mm7               ; mm1=(00 01 02 03 10 11 12 13)
+       punpckhwd mm0,mm7               ; mm0=(20 21 22 23 30 31 32 33)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+       movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+
+       psrlq   mm1,4*BYTE_BIT
+       psrlq   mm0,4*BYTE_BIT
+
+       mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+       movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_2x2_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                    JCOEFPTR coef_block,
+;                    JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+       align   16
+       global  EXTN(jpeg_idct_2x2_mmx)
+
+EXTN(jpeg_idct_2x2_mmx):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+
+       ; | input:                  | result:        |
+       ; | 00 01 ** 03 ** 05 ** 07 |                |
+       ; | 10 11 ** 13 ** 15 ** 17 |                |
+       ; | ** ** ** ** ** ** ** ** |                |
+       ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+       ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+       ; | 50 51 ** 53 ** 55 ** 57 |                |
+       ; | ** ** ** ** ** ** ** ** |                |
+       ; | 70 71 ** 73 ** 75 ** 77 |                |
+
+       ; -- Odd part
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
+       ; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
+
+       pcmpeqd   mm7,mm7
+       pslld     mm7,WORD_BIT          ; mm7={0x0000 0xFFFF 0x0000 0xFFFF}
+
+       movq      mm4,mm0               ; mm4=(10 11 ** 13)
+       movq      mm5,mm2               ; mm5=(50 51 ** 53)
+       punpcklwd mm4,mm1               ; mm4=(10 30 11 31)
+       punpcklwd mm5,mm3               ; mm5=(50 70 51 71)
+       pmaddwd   mm4,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+       psrld   mm0,WORD_BIT            ; mm0=(11 -- 13 --)
+       pand    mm1,mm7                 ; mm1=(-- 31 -- 33)
+       psrld   mm2,WORD_BIT            ; mm2=(51 -- 53 --)
+       pand    mm3,mm7                 ; mm3=(-- 71 -- 73)
+       por     mm0,mm1                 ; mm0=(11 31 13 33)
+       por     mm2,mm3                 ; mm2=(51 71 53 73)
+       pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)]
+
+       paddd   mm4,mm5                 ; mm4=tmp0[col0 col1]
+
+       movq    mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
+       pmullw  mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movq    mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
+       movq    mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
+       pmullw  mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
+       ; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
+
+       psrld   mm6,WORD_BIT            ; mm6=(15 -- 17 --)
+       pand    mm1,mm7                 ; mm1=(-- 35 -- 37)
+       psrld   mm3,WORD_BIT            ; mm3=(55 -- 57 --)
+       pand    mm5,mm7                 ; mm5=(-- 75 -- 77)
+       por     mm6,mm1                 ; mm6=(15 35 17 37)
+       por     mm3,mm5                 ; mm3=(55 75 57 77)
+       pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)]
+
+       paddd   mm0,mm2                 ; mm0=tmp0[col1 col3]
+       paddd   mm6,mm3                 ; mm6=tmp0[col5 col7]
+
+       ; -- Even part
+
+       movq    mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq    mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
+       pmullw  mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
+
+       movq    mm2,mm1                         ; mm2=(00 01 ** 03)
+       pslld   mm1,WORD_BIT                    ; mm1=(-- 00 -- **)
+       psrad   mm1,(WORD_BIT-CONST_BITS-2)     ; mm1=tmp10[col0 ****]
+
+       pand    mm2,mm7                         ; mm2=(-- 01 -- 03)
+       pand    mm5,mm7                         ; mm5=(-- 05 -- 07)
+       psrad   mm2,(WORD_BIT-CONST_BITS-2)     ; mm2=tmp10[col1 col3]
+       psrad   mm5,(WORD_BIT-CONST_BITS-2)     ; mm5=tmp10[col5 col7]
+
+       ; -- Final output stage
+
+       movq      mm3,mm1
+       paddd     mm1,mm4               ; mm1=data0[col0 ****]=(A0 **)
+       psubd     mm3,mm4               ; mm3=data1[col0 ****]=(B0 **)
+       punpckldq mm1,mm3               ; mm1=(A0 B0)
+
+       movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)]       ; mm7=[PD_DESCALE_P1_2]
+
+       movq    mm4,mm2
+       movq    mm3,mm5
+       paddd   mm2,mm0                 ; mm2=data0[col1 col3]=(A1 A3)
+       paddd   mm5,mm6                 ; mm5=data0[col5 col7]=(A5 A7)
+       psubd   mm4,mm0                 ; mm4=data1[col1 col3]=(B1 B3)
+       psubd   mm3,mm6                 ; mm3=data1[col5 col7]=(B5 B7)
+
+       paddd   mm1,mm7
+       psrad   mm1,DESCALE_P1_2
+
+       paddd   mm2,mm7
+       paddd   mm5,mm7
+       psrad   mm2,DESCALE_P1_2
+       psrad   mm5,DESCALE_P1_2
+       paddd   mm4,mm7
+       paddd   mm3,mm7
+       psrad   mm4,DESCALE_P1_2
+       psrad   mm3,DESCALE_P1_2
+
+       ; ---- Pass 2: process rows, store into output array.
+
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(ebp)]
+
+       ; | input:| result:|
+       ; | A0 B0 |        |
+       ; | A1 B1 | C0 C1  |
+       ; | A3 B3 | D0 D1  |
+       ; | A5 B5 |        |
+       ; | A7 B7 |        |
+
+       ; -- Odd part
+
+       packssdw  mm2,mm4               ; mm2=(A1 A3 B1 B3)
+       packssdw  mm5,mm3               ; mm5=(A5 A7 B5 B7)
+       pmaddwd   mm2,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+       paddd     mm2,mm5               ; mm2=tmp0[row0 row1]
+
+       ; -- Even part
+
+       pslld     mm1,(CONST_BITS+2)    ; mm1=tmp10[row0 row1]
+
+       ; -- Final output stage
+
+       movq      mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)]     ; mm0=[PD_DESCALE_P2_2]
+
+       movq      mm6,mm1
+       paddd     mm1,mm2               ; mm1=data0[row0 row1]=(C0 C1)
+       psubd     mm6,mm2               ; mm6=data1[row0 row1]=(D0 D1)
+
+       paddd     mm1,mm0
+       paddd     mm6,mm0
+       psrad     mm1,DESCALE_P2_2
+       psrad     mm6,DESCALE_P2_2
+
+       movq      mm7,mm1               ; transpose coefficients
+       punpckldq mm1,mm6               ; mm1=(C0 D0)
+       punpckhdq mm7,mm6               ; mm7=(C1 D1)
+
+       packssdw  mm1,mm7               ; mm1=(C0 D0 C1 D1)
+       packsswb  mm1,mm1               ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1)
+       paddb     mm1,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+       movd    ecx,mm1
+       movd    ebx,mm1                 ; ebx=(C0 D0 C1 D1)
+       shr     ecx,2*BYTE_BIT          ; ecx=(C1 D1 -- --)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+       mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_MMX_SUPPORTED
+%endif ; IDCT_SCALING_SUPPORTED
diff --git a/jiss2flt.asm b/jiss2flt.asm
new file mode 100644 (file)
index 0000000..c0565a3
--- /dev/null
@@ -0,0 +1,508 @@
+;
+; jiss2flt.asm - floating-point IDCT (SSE & SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%macro unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+       shufps  %1,%2,0x44
+%endmacro
+
+%macro unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+       shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_float_sse2)
+
+EXTN(jconst_idct_float_sse2):
+
+PD_1_414       times 4 dd  1.414213562373095048801689
+PD_1_847       times 4 dd  1.847759065022573512256366
+PD_1_082       times 4 dd  1.082392200292393968799446
+PD_M2_613      times 4 dd -2.613125929752753055713286
+PD_RNDINT_MAGIC        times 4 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP times 16 db CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_float_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                       JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                       ; FAST_FLOAT workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_float_sse2)
+
+EXTN(jpeg_idct_float_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     near .columnDCT
+
+       movq    xmm1, _MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    xmm2, _MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq    xmm3, _MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq    xmm4, _MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq    xmm5, _MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq    xmm6, _MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       movq    xmm7, _MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     xmm1,xmm2
+       por     xmm3,xmm4
+       por     xmm5,xmm6
+       por     xmm1,xmm3
+       por     xmm5,xmm7
+       por     xmm1,xmm5
+       packsswb xmm1,xmm1
+       movd    eax,xmm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movq      xmm0, _MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+       psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+       cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+
+       mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm1,xmm0
+       movaps  xmm2,xmm0
+       movaps  xmm3,xmm0
+
+       shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+       shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+       shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+       shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+       movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movq      xmm0, _MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq      xmm1, _MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq      xmm2, _MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq      xmm3, _MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+       punpcklwd xmm1,xmm1             ; xmm1=(20 20 21 21 22 22 23 23)
+       psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+       psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in2=(20 21 22 23)
+       cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+       cvtdq2ps  xmm1,xmm1                     ; xmm1=in2=(20 21 22 23)
+
+       punpcklwd xmm2,xmm2             ; xmm2=(40 40 41 41 42 42 43 43)
+       punpcklwd xmm3,xmm3             ; xmm3=(60 60 61 61 62 62 63 63)
+       psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in4=(40 41 42 43)
+       psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in6=(60 61 62 63)
+       cvtdq2ps  xmm2,xmm2                     ; xmm2=in4=(40 41 42 43)
+       cvtdq2ps  xmm3,xmm3                     ; xmm3=in6=(60 61 62 63)
+
+       mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm4,xmm0
+       movaps  xmm5,xmm1
+       subps   xmm0,xmm2               ; xmm0=tmp11
+       subps   xmm1,xmm3
+       addps   xmm4,xmm2               ; xmm4=tmp10
+       addps   xmm5,xmm3               ; xmm5=tmp13
+
+       mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+       subps   xmm1,xmm5               ; xmm1=tmp12
+
+       movaps  xmm6,xmm4
+       movaps  xmm7,xmm0
+       subps   xmm4,xmm5               ; xmm4=tmp3
+       subps   xmm0,xmm1               ; xmm0=tmp2
+       addps   xmm6,xmm5               ; xmm6=tmp0
+       addps   xmm7,xmm1               ; xmm7=tmp1
+
+       movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+       movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+       ; -- Odd part
+
+       movq      xmm2, _MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq      xmm3, _MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq      xmm5, _MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq      xmm1, _MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       punpcklwd xmm2,xmm2             ; xmm2=(10 10 11 11 12 12 13 13)
+       punpcklwd xmm3,xmm3             ; xmm3=(30 30 31 31 32 32 33 33)
+       psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in1=(10 11 12 13)
+       psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in3=(30 31 32 33)
+       cvtdq2ps  xmm2,xmm2                     ; xmm2=in1=(10 11 12 13)
+       cvtdq2ps  xmm3,xmm3                     ; xmm3=in3=(30 31 32 33)
+
+       punpcklwd xmm5,xmm5             ; xmm5=(50 50 51 51 52 52 53 53)
+       punpcklwd xmm1,xmm1             ; xmm1=(70 70 71 71 72 72 73 73)
+       psrad     xmm5,(DWORD_BIT-WORD_BIT)     ; xmm5=in5=(50 51 52 53)
+       psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in7=(70 71 72 73)
+       cvtdq2ps  xmm5,xmm5                     ; xmm5=in5=(50 51 52 53)
+       cvtdq2ps  xmm1,xmm1                     ; xmm1=in7=(70 71 72 73)
+
+       mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm4,xmm2
+       movaps  xmm0,xmm5
+       addps   xmm2,xmm1               ; xmm2=z11
+       addps   xmm5,xmm3               ; xmm5=z13
+       subps   xmm4,xmm1               ; xmm4=z12
+       subps   xmm0,xmm3               ; xmm0=z10
+
+       movaps  xmm1,xmm2
+       subps   xmm2,xmm5
+       addps   xmm1,xmm5               ; xmm1=tmp7
+
+       mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+       movaps  xmm3,xmm0
+       addps   xmm0,xmm4
+       mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+       mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+       mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+       addps   xmm3,xmm0               ; xmm3=tmp12
+       subps   xmm4,xmm0               ; xmm4=tmp10
+
+       ; -- Final output stage
+
+       subps   xmm3,xmm1               ; xmm3=tmp6
+       movaps  xmm5,xmm6
+       movaps  xmm0,xmm7
+       addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+       addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+       subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+       subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+       subps   xmm2,xmm3               ; xmm2=tmp5
+
+       movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+       unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+       unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+       movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+       unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+       unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
+
+       movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+       movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
+
+       movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+       movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
+
+       addps   xmm4,xmm2               ; xmm4=tmp4
+       movaps  xmm0,xmm7
+       movaps  xmm3,xmm5
+       addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+       addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+       subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+       subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
+
+       movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+       unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+       unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+       movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+       unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+       unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
+
+       movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+       unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+       unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+       movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+       unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+       unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
+
+       movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+       movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+       movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+
+       movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+       unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+       unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+       movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+       unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+       unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
+
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+       movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+       movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+
+.nextcolumn:
+       add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+       add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.rowloop:
+
+       ; -- Even part
+
+       movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movaps  xmm4,xmm0
+       movaps  xmm5,xmm1
+       subps   xmm0,xmm2               ; xmm0=tmp11
+       subps   xmm1,xmm3
+       addps   xmm4,xmm2               ; xmm4=tmp10
+       addps   xmm5,xmm3               ; xmm5=tmp13
+
+       mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+       subps   xmm1,xmm5               ; xmm1=tmp12
+
+       movaps  xmm6,xmm4
+       movaps  xmm7,xmm0
+       subps   xmm4,xmm5               ; xmm4=tmp3
+       subps   xmm0,xmm1               ; xmm0=tmp2
+       addps   xmm6,xmm5               ; xmm6=tmp0
+       addps   xmm7,xmm1               ; xmm7=tmp1
+
+       movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+       movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+       ; -- Odd part
+
+       movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movaps  xmm4,xmm2
+       movaps  xmm0,xmm5
+       addps   xmm2,xmm1               ; xmm2=z11
+       addps   xmm5,xmm3               ; xmm5=z13
+       subps   xmm4,xmm1               ; xmm4=z12
+       subps   xmm0,xmm3               ; xmm0=z10
+
+       movaps  xmm1,xmm2
+       subps   xmm2,xmm5
+       addps   xmm1,xmm5               ; xmm1=tmp7
+
+       mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+       movaps  xmm3,xmm0
+       addps   xmm0,xmm4
+       mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+       mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+       mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+       addps   xmm3,xmm0               ; xmm3=tmp12
+       subps   xmm4,xmm0               ; xmm4=tmp10
+
+       ; -- Final output stage
+
+       subps   xmm3,xmm1               ; xmm3=tmp6
+       movaps  xmm5,xmm6
+       movaps  xmm0,xmm7
+       addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+       addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+       subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+       subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+       subps   xmm2,xmm3               ; xmm2=tmp5
+
+       movaps  xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm1=[PD_RNDINT_MAGIC]
+       pcmpeqd xmm3,xmm3
+       psrld   xmm3,WORD_BIT           ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+       addps   xmm6,xmm1       ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
+       addps   xmm7,xmm1       ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
+       addps   xmm0,xmm1       ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
+       addps   xmm5,xmm1       ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
+
+       pand    xmm6,xmm3               ; xmm6=(00 -- 10 -- 20 -- 30 --)
+       pslld   xmm7,WORD_BIT           ; xmm7=(-- 01 -- 11 -- 21 -- 31)
+       pand    xmm0,xmm3               ; xmm0=(06 -- 16 -- 26 -- 36 --)
+       pslld   xmm5,WORD_BIT           ; xmm5=(-- 07 -- 17 -- 27 -- 37)
+       por     xmm6,xmm7               ; xmm6=(00 01 10 11 20 21 30 31)
+       por     xmm0,xmm5               ; xmm0=(06 07 16 17 26 27 36 37)
+
+       movaps  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp2
+       movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=tmp3
+
+       addps   xmm4,xmm2               ; xmm4=tmp4
+       movaps  xmm7,xmm1
+       movaps  xmm5,xmm3
+       addps   xmm1,xmm2               ; xmm1=data2=(02 12 22 32)
+       addps   xmm3,xmm4               ; xmm3=data4=(04 14 24 34)
+       subps   xmm7,xmm2               ; xmm7=data5=(05 15 25 35)
+       subps   xmm5,xmm4               ; xmm5=data3=(03 13 23 33)
+
+       movaps  xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm2=[PD_RNDINT_MAGIC]
+       pcmpeqd xmm4,xmm4
+       psrld   xmm4,WORD_BIT           ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+       addps   xmm3,xmm2       ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
+       addps   xmm7,xmm2       ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
+       addps   xmm1,xmm2       ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
+       addps   xmm5,xmm2       ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
+
+       pand    xmm3,xmm4               ; xmm3=(04 -- 14 -- 24 -- 34 --)
+       pslld   xmm7,WORD_BIT           ; xmm7=(-- 05 -- 15 -- 25 -- 35)
+       pand    xmm1,xmm4               ; xmm1=(02 -- 12 -- 22 -- 32 --)
+       pslld   xmm5,WORD_BIT           ; xmm5=(-- 03 -- 13 -- 23 -- 33)
+       por     xmm3,xmm7               ; xmm3=(04 05 14 15 24 25 34 35)
+       por     xmm1,xmm5               ; xmm1=(02 03 12 13 22 23 32 33)
+
+       movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
+
+       packsswb  xmm6,xmm3     ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
+       packsswb  xmm1,xmm0     ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
+       paddb     xmm6,xmm2
+       paddb     xmm1,xmm2
+
+       movdqa    xmm4,xmm6     ; transpose coefficients(phase 2)
+       punpcklwd xmm6,xmm1     ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+       punpckhwd xmm4,xmm1     ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+
+       movdqa    xmm7,xmm6     ; transpose coefficients(phase 3)
+       punpckldq xmm6,xmm4     ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+       punpckhdq xmm7,xmm4     ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+
+       pshufd  xmm5,xmm6,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+       pshufd  xmm3,xmm7,0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+
+       pushpic ebx                     ; save GOT address
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+       movq    _MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7
+       mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+       movq    _MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
+
+       poppic  ebx                     ; restore GOT address
+
+       add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+       add     edi, byte 4*SIZEOF_JSAMPROW
+       dec     ecx                             ; ctr
+       jnz     near .rowloop
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_FLT_SSE_SSE2_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
diff --git a/jiss2fst.asm b/jiss2fst.asm
new file mode 100644 (file)
index 0000000..937a260
--- /dev/null
@@ -0,0 +1,512 @@
+;
+; jiss2fst.asm - fast integer IDCT (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_IFAST_SUPPORTED
+%ifdef JIDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     8       ; 14 is also OK.
+%define PASS1_BITS     2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082        equ     277             ; FIX(1.082392200)
+F_1_414        equ     362             ; FIX(1.414213562)
+F_1_847        equ     473             ; FIX(1.847759065)
+F_2_613        equ     669             ; FIX(2.613125930)
+F_1_613        equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define        DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082        equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414        equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613        equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613        equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+       alignz  16
+       global  EXTN(jconst_idct_ifast_sse2)
+
+EXTN(jconst_idct_ifast_sse2):
+
+PW_F1414       times 8 dw  F_1_414 << CONST_SHIFT
+PW_F1847       times 8 dw  F_1_847 << CONST_SHIFT
+PW_MF1613      times 8 dw -F_1_613 << CONST_SHIFT
+PW_F1082       times 8 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP times 16 db CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_ifast_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                       JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_idct_ifast_sse2)
+
+EXTN(jpeg_idct_ifast_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     near .columnDCT
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     xmm1,xmm0
+       packsswb xmm1,xmm1
+       packsswb xmm1,xmm1
+       movd    eax,xmm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       movdqa    xmm7,xmm0             ; xmm0=in0=(00 01 02 03 04 05 06 07)
+       punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+       punpckhwd xmm7,xmm7             ; xmm7=(04 04 05 05 06 06 07 07)
+
+       pshufd  xmm6,xmm0,0x00          ; xmm6=col0=(00 00 00 00 00 00 00 00)
+       pshufd  xmm2,xmm0,0x55          ; xmm2=col1=(01 01 01 01 01 01 01 01)
+       pshufd  xmm5,xmm0,0xAA          ; xmm5=col2=(02 02 02 02 02 02 02 02)
+       pshufd  xmm0,xmm0,0xFF          ; xmm0=col3=(03 03 03 03 03 03 03 03)
+       pshufd  xmm1,xmm7,0x00          ; xmm1=col4=(04 04 04 04 04 04 04 04)
+       pshufd  xmm4,xmm7,0x55          ; xmm4=col5=(05 05 05 05 05 05 05 05)
+       pshufd  xmm3,xmm7,0xAA          ; xmm3=col6=(06 06 06 06 06 06 06 06)
+       pshufd  xmm7,xmm7,0xFF          ; xmm7=col7=(07 07 07 07 07 07 07 07)
+
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=col1
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=col3
+       jmp     near .column_end
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       movdqa  xmm4,xmm0
+       movdqa  xmm5,xmm1
+       psubw   xmm0,xmm2               ; xmm0=tmp11
+       psubw   xmm1,xmm3
+       paddw   xmm4,xmm2               ; xmm4=tmp10
+       paddw   xmm5,xmm3               ; xmm5=tmp13
+
+       psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm1,[GOTOFF(ebx,PW_F1414)]
+       psubw   xmm1,xmm5               ; xmm1=tmp12
+
+       movdqa  xmm6,xmm4
+       movdqa  xmm7,xmm0
+       psubw   xmm4,xmm5               ; xmm4=tmp3
+       psubw   xmm0,xmm1               ; xmm0=tmp2
+       paddw   xmm6,xmm5               ; xmm6=tmp0
+       paddw   xmm7,xmm1               ; xmm7=tmp1
+
+       movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=tmp3
+       movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=tmp2
+
+       ; -- Odd part
+
+       movdqa  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       movdqa  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+       movdqa  xmm4,xmm2
+       movdqa  xmm0,xmm5
+       psubw   xmm2,xmm1               ; xmm2=z12
+       psubw   xmm5,xmm3               ; xmm5=z10
+       paddw   xmm4,xmm1               ; xmm4=z11
+       paddw   xmm0,xmm3               ; xmm0=z13
+
+       movdqa  xmm1,xmm5               ; xmm1=z10(unscaled)
+       psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+       psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+
+       movdqa  xmm3,xmm4
+       psubw   xmm4,xmm0
+       paddw   xmm3,xmm0               ; xmm3=tmp7
+
+       psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm4,[GOTOFF(ebx,PW_F1414)]     ; xmm4=tmp11
+
+       ; To avoid overflow...
+       ;
+       ; (Original)
+       ; tmp12 = -2.613125930 * z10 + z5;
+       ;
+       ; (This implementation)
+       ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+       ;       = -1.613125930 * z10 - z10 + z5;
+
+       movdqa  xmm0,xmm5
+       paddw   xmm5,xmm2
+       pmulhw  xmm5,[GOTOFF(ebx,PW_F1847)]     ; xmm5=z5
+       pmulhw  xmm0,[GOTOFF(ebx,PW_MF1613)]
+       pmulhw  xmm2,[GOTOFF(ebx,PW_F1082)]
+       psubw   xmm0,xmm1
+       psubw   xmm2,xmm5               ; xmm2=tmp10
+       paddw   xmm0,xmm5               ; xmm0=tmp12
+
+       ; -- Final output stage
+
+       psubw   xmm0,xmm3               ; xmm0=tmp6
+       movdqa  xmm1,xmm6
+       movdqa  xmm5,xmm7
+       paddw   xmm6,xmm3               ; xmm6=data0=(00 01 02 03 04 05 06 07)
+       paddw   xmm7,xmm0               ; xmm7=data1=(10 11 12 13 14 15 16 17)
+       psubw   xmm1,xmm3               ; xmm1=data7=(70 71 72 73 74 75 76 77)
+       psubw   xmm5,xmm0               ; xmm5=data6=(60 61 62 63 64 65 66 67)
+       psubw   xmm4,xmm0               ; xmm4=tmp5
+
+       movdqa    xmm3,xmm6             ; transpose coefficients(phase 1)
+       punpcklwd xmm6,xmm7             ; xmm6=(00 10 01 11 02 12 03 13)
+       punpckhwd xmm3,xmm7             ; xmm3=(04 14 05 15 06 16 07 17)
+       movdqa    xmm0,xmm5             ; transpose coefficients(phase 1)
+       punpcklwd xmm5,xmm1             ; xmm5=(60 70 61 71 62 72 63 73)
+       punpckhwd xmm0,xmm1             ; xmm0=(64 74 65 75 66 76 67 77)
+
+       movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+       movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
+
+       movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(60 70 61 71 62 72 63 73)
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(64 74 65 75 66 76 67 77)
+
+       paddw   xmm2,xmm4               ; xmm2=tmp4
+       movdqa  xmm5,xmm7
+       movdqa  xmm0,xmm1
+       paddw   xmm7,xmm4               ; xmm7=data2=(20 21 22 23 24 25 26 27)
+       paddw   xmm1,xmm2               ; xmm1=data4=(40 41 42 43 44 45 46 47)
+       psubw   xmm5,xmm4               ; xmm5=data5=(50 51 52 53 54 55 56 57)
+       psubw   xmm0,xmm2               ; xmm0=data3=(30 31 32 33 34 35 36 37)
+
+       movdqa    xmm4,xmm7             ; transpose coefficients(phase 1)
+       punpcklwd xmm7,xmm0             ; xmm7=(20 30 21 31 22 32 23 33)
+       punpckhwd xmm4,xmm0             ; xmm4=(24 34 25 35 26 36 27 37)
+       movdqa    xmm2,xmm1             ; transpose coefficients(phase 1)
+       punpcklwd xmm1,xmm5             ; xmm1=(40 50 41 51 42 52 43 53)
+       punpckhwd xmm2,xmm5             ; xmm2=(44 54 45 55 46 56 47 57)
+
+       movdqa    xmm0,xmm3             ; transpose coefficients(phase 2)
+       punpckldq xmm3,xmm4             ; xmm3=(04 14 24 34 05 15 25 35)
+       punpckhdq xmm0,xmm4             ; xmm0=(06 16 26 36 07 17 27 37)
+       movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm7             ; xmm6=(00 10 20 30 01 11 21 31)
+       punpckhdq xmm5,xmm7             ; xmm5=(02 12 22 32 03 13 23 33)
+
+       movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(60 70 61 71 62 72 63 73)
+       movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=(64 74 65 75 66 76 67 77)
+
+       movdqa  XMMWORD [wk(0)], xmm3   ; wk(0)=(04 14 24 34 05 15 25 35)
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(06 16 26 36 07 17 27 37)
+
+       movdqa    xmm3,xmm1             ; transpose coefficients(phase 2)
+       punpckldq xmm1,xmm4             ; xmm1=(40 50 60 70 41 51 61 71)
+       punpckhdq xmm3,xmm4             ; xmm3=(42 52 62 72 43 53 63 73)
+       movdqa    xmm0,xmm2             ; transpose coefficients(phase 2)
+       punpckldq xmm2,xmm7             ; xmm2=(44 54 64 74 45 55 65 75)
+       punpckhdq xmm0,xmm7             ; xmm0=(46 56 66 76 47 57 67 77)
+
+       movdqa     xmm4,xmm6            ; transpose coefficients(phase 3)
+       punpcklqdq xmm6,xmm1            ; xmm6=col0=(00 10 20 30 40 50 60 70)
+       punpckhqdq xmm4,xmm1            ; xmm4=col1=(01 11 21 31 41 51 61 71)
+       movdqa     xmm7,xmm5            ; transpose coefficients(phase 3)
+       punpcklqdq xmm5,xmm3            ; xmm5=col2=(02 12 22 32 42 52 62 72)
+       punpckhqdq xmm7,xmm3            ; xmm7=col3=(03 13 23 33 43 53 63 73)
+
+       movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(04 14 24 34 05 15 25 35)
+       movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(06 16 26 36 07 17 27 37)
+
+       movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=col1
+       movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=col3
+
+       movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+       punpcklqdq xmm1,xmm2            ; xmm1=col4=(04 14 24 34 44 54 64 74)
+       punpckhqdq xmm4,xmm2            ; xmm4=col5=(05 15 25 35 45 55 65 75)
+       movdqa     xmm7,xmm3            ; transpose coefficients(phase 3)
+       punpcklqdq xmm3,xmm0            ; xmm3=col6=(06 16 26 36 46 56 66 76)
+       punpckhqdq xmm7,xmm0            ; xmm7=col7=(07 17 27 37 47 57 67 77)
+.column_end:
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+
+       ; -- Even part
+
+       ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
+
+       movdqa  xmm2,xmm6
+       movdqa  xmm0,xmm5
+       psubw   xmm6,xmm1               ; xmm6=tmp11
+       psubw   xmm5,xmm3
+       paddw   xmm2,xmm1               ; xmm2=tmp10
+       paddw   xmm0,xmm3               ; xmm0=tmp13
+
+       psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm5,[GOTOFF(ebx,PW_F1414)]
+       psubw   xmm5,xmm0               ; xmm5=tmp12
+
+       movdqa  xmm1,xmm2
+       movdqa  xmm3,xmm6
+       psubw   xmm2,xmm0               ; xmm2=tmp3
+       psubw   xmm6,xmm5               ; xmm6=tmp2
+       paddw   xmm1,xmm0               ; xmm1=tmp0
+       paddw   xmm3,xmm5               ; xmm3=tmp1
+
+       movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=col1
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=col3
+
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp3
+       movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp2
+
+       ; -- Odd part
+
+       ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
+
+       movdqa  xmm2,xmm0
+       movdqa  xmm6,xmm4
+       psubw   xmm0,xmm7               ; xmm0=z12
+       psubw   xmm4,xmm5               ; xmm4=z10
+       paddw   xmm2,xmm7               ; xmm2=z11
+       paddw   xmm6,xmm5               ; xmm6=z13
+
+       movdqa  xmm7,xmm4               ; xmm7=z10(unscaled)
+       psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+       psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+
+       movdqa  xmm5,xmm2
+       psubw   xmm2,xmm6
+       paddw   xmm5,xmm6               ; xmm5=tmp7
+
+       psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+       pmulhw  xmm2,[GOTOFF(ebx,PW_F1414)]     ; xmm2=tmp11
+
+       ; To avoid overflow...
+       ;
+       ; (Original)
+       ; tmp12 = -2.613125930 * z10 + z5;
+       ;
+       ; (This implementation)
+       ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+       ;       = -1.613125930 * z10 - z10 + z5;
+
+       movdqa  xmm6,xmm4
+       paddw   xmm4,xmm0
+       pmulhw  xmm4,[GOTOFF(ebx,PW_F1847)]     ; xmm4=z5
+       pmulhw  xmm6,[GOTOFF(ebx,PW_MF1613)]
+       pmulhw  xmm0,[GOTOFF(ebx,PW_F1082)]
+       psubw   xmm6,xmm7
+       psubw   xmm0,xmm4               ; xmm0=tmp10
+       paddw   xmm6,xmm4               ; xmm6=tmp12
+
+       ; -- Final output stage
+
+       psubw   xmm6,xmm5               ; xmm6=tmp6
+       movdqa  xmm7,xmm1
+       movdqa  xmm4,xmm3
+       paddw   xmm1,xmm5               ; xmm1=data0=(00 10 20 30 40 50 60 70)
+       paddw   xmm3,xmm6               ; xmm3=data1=(01 11 21 31 41 51 61 71)
+       psraw   xmm1,(PASS1_BITS+3)     ; descale
+       psraw   xmm3,(PASS1_BITS+3)     ; descale
+       psubw   xmm7,xmm5               ; xmm7=data7=(07 17 27 37 47 57 67 77)
+       psubw   xmm4,xmm6               ; xmm4=data6=(06 16 26 36 46 56 66 76)
+       psraw   xmm7,(PASS1_BITS+3)     ; descale
+       psraw   xmm4,(PASS1_BITS+3)     ; descale
+       psubw   xmm2,xmm6               ; xmm2=tmp5
+
+       packsswb  xmm1,xmm4     ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+       packsswb  xmm3,xmm7     ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+       movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp2
+       movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp3
+
+       paddw   xmm0,xmm2               ; xmm0=tmp4
+       movdqa  xmm4,xmm5
+       movdqa  xmm7,xmm6
+       paddw   xmm5,xmm2               ; xmm5=data2=(02 12 22 32 42 52 62 72)
+       paddw   xmm6,xmm0               ; xmm6=data4=(04 14 24 34 44 54 64 74)
+       psraw   xmm5,(PASS1_BITS+3)     ; descale
+       psraw   xmm6,(PASS1_BITS+3)     ; descale
+       psubw   xmm4,xmm2               ; xmm4=data5=(05 15 25 35 45 55 65 75)
+       psubw   xmm7,xmm0               ; xmm7=data3=(03 13 23 33 43 53 63 73)
+       psraw   xmm4,(PASS1_BITS+3)     ; descale
+       psraw   xmm7,(PASS1_BITS+3)     ; descale
+
+       movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
+
+       packsswb  xmm5,xmm6     ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+       packsswb  xmm7,xmm4     ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+       paddb     xmm1,xmm2
+       paddb     xmm3,xmm2
+       paddb     xmm5,xmm2
+       paddb     xmm7,xmm2
+
+       movdqa    xmm0,xmm1     ; transpose coefficients(phase 1)
+       punpcklbw xmm1,xmm3     ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+       punpckhbw xmm0,xmm3     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+       movdqa    xmm6,xmm5     ; transpose coefficients(phase 1)
+       punpcklbw xmm5,xmm7     ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+       punpckhbw xmm6,xmm7     ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+       movdqa    xmm4,xmm1     ; transpose coefficients(phase 2)
+       punpcklwd xmm1,xmm5     ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+       punpckhwd xmm4,xmm5     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+       movdqa    xmm2,xmm6     ; transpose coefficients(phase 2)
+       punpcklwd xmm6,xmm0     ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+       punpckhwd xmm2,xmm0     ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+       movdqa    xmm3,xmm1     ; transpose coefficients(phase 3)
+       punpckldq xmm1,xmm6     ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+       punpckhdq xmm3,xmm6     ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+       movdqa    xmm7,xmm4     ; transpose coefficients(phase 3)
+       punpckldq xmm4,xmm2     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+       punpckhdq xmm7,xmm2     ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+       pshufd  xmm5,xmm1,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+       pshufd  xmm0,xmm3,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+       pshufd  xmm6,xmm4,0x4E  ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+       pshufd  xmm2,xmm7,0x4E  ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+       mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7
+
+       mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+       mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_SSE2_SUPPORTED
+%endif ; DCT_IFAST_SUPPORTED
diff --git a/jiss2int.asm b/jiss2int.asm
new file mode 100644 (file)
index 0000000..b0e7109
--- /dev/null
@@ -0,0 +1,869 @@
+;
+; jiss2int.asm - accurate integer IDCT (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_ISLOW_SUPPORTED
+%ifdef JIDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1     (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2     (CONST_BITS+PASS1_BITS+3)
+
+%if CONST_BITS == 13
+F_0_298        equ      2446           ; FIX(0.298631336)
+F_0_390        equ      3196           ; FIX(0.390180644)
+F_0_541        equ      4433           ; FIX(0.541196100)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_175        equ      9633           ; FIX(1.175875602)
+F_1_501        equ     12299           ; FIX(1.501321110)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_1_961        equ     16069           ; FIX(1.961570560)
+F_2_053        equ     16819           ; FIX(2.053119869)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_072        equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298        equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390        equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541        equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175        equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501        equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961        equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053        equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072        equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_islow_sse2)
+
+EXTN(jconst_idct_islow_sse2):
+
+PW_F130_F054   times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130  times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117  times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078   times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089 times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060  times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256 times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050  times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP times 16 db CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_islow_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                       JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         12
+
+       align   16
+       global  EXTN(jpeg_idct_islow_sse2)
+
+EXTN(jpeg_idct_islow_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     near .columnDCT
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     xmm1,xmm0
+       packsswb xmm1,xmm1
+       packsswb xmm1,xmm1
+       movd    eax,xmm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movdqa  xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       psllw   xmm5,PASS1_BITS
+
+       movdqa    xmm4,xmm5             ; xmm5=in0=(00 01 02 03 04 05 06 07)
+       punpcklwd xmm5,xmm5             ; xmm5=(00 00 01 01 02 02 03 03)
+       punpckhwd xmm4,xmm4             ; xmm4=(04 04 05 05 06 06 07 07)
+
+       pshufd  xmm7,xmm5,0x00          ; xmm7=col0=(00 00 00 00 00 00 00 00)
+       pshufd  xmm6,xmm5,0x55          ; xmm6=col1=(01 01 01 01 01 01 01 01)
+       pshufd  xmm1,xmm5,0xAA          ; xmm1=col2=(02 02 02 02 02 02 02 02)
+       pshufd  xmm5,xmm5,0xFF          ; xmm5=col3=(03 03 03 03 03 03 03 03)
+       pshufd  xmm0,xmm4,0x00          ; xmm0=col4=(04 04 04 04 04 04 04 04)
+       pshufd  xmm3,xmm4,0x55          ; xmm3=col5=(05 05 05 05 05 05 05 05)
+       pshufd  xmm2,xmm4,0xAA          ; xmm2=col6=(06 06 06 06 06 06 06 06)
+       pshufd  xmm4,xmm4,0xFF          ; xmm4=col7=(07 07 07 07 07 07 07 07)
+
+       movdqa  XMMWORD [wk(8)], xmm6   ; wk(8)=col1
+       movdqa  XMMWORD [wk(9)], xmm5   ; wk(9)=col3
+       movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+       movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+       jmp     near .column_end
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; (Original)
+       ; z1 = (z2 + z3) * 0.541196100;
+       ; tmp2 = z1 + z3 * -1.847759065;
+       ; tmp3 = z1 + z2 * 0.765366865;
+       ;
+       ; (This implementation)
+       ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+       ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+       movdqa    xmm4,xmm1             ; xmm1=in2=z2
+       movdqa    xmm5,xmm1
+       punpcklwd xmm4,xmm3             ; xmm3=in6=z3
+       punpckhwd xmm5,xmm3
+       movdqa    xmm1,xmm4
+       movdqa    xmm3,xmm5
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=tmp3L
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm3=tmp2H
+
+       movdqa    xmm6,xmm0
+       paddw     xmm0,xmm2             ; xmm0=in0+in4
+       psubw     xmm6,xmm2             ; xmm6=in0-in4
+
+       pxor      xmm7,xmm7
+       pxor      xmm2,xmm2
+       punpcklwd xmm7,xmm0             ; xmm7=tmp0L
+       punpckhwd xmm2,xmm0             ; xmm2=tmp0H
+       psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+       psrad     xmm2,(16-CONST_BITS)  ; psrad xmm2,16 & pslld xmm2,CONST_BITS
+
+       movdqa  xmm0,xmm7
+       paddd   xmm7,xmm4               ; xmm7=tmp10L
+       psubd   xmm0,xmm4               ; xmm0=tmp13L
+       movdqa  xmm4,xmm2
+       paddd   xmm2,xmm5               ; xmm2=tmp10H
+       psubd   xmm4,xmm5               ; xmm4=tmp13H
+
+       movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp10L
+       movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=tmp10H
+       movdqa  XMMWORD [wk(2)], xmm0   ; wk(2)=tmp13L
+       movdqa  XMMWORD [wk(3)], xmm4   ; wk(3)=tmp13H
+
+       pxor      xmm5,xmm5
+       pxor      xmm7,xmm7
+       punpcklwd xmm5,xmm6             ; xmm5=tmp1L
+       punpckhwd xmm7,xmm6             ; xmm7=tmp1H
+       psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+       psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+
+       movdqa  xmm2,xmm5
+       paddd   xmm5,xmm1               ; xmm5=tmp11L
+       psubd   xmm2,xmm1               ; xmm2=tmp12L
+       movdqa  xmm0,xmm7
+       paddd   xmm7,xmm3               ; xmm7=tmp11H
+       psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+       movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+       movdqa  XMMWORD [wk(5)], xmm7   ; wk(5)=tmp11H
+       movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=tmp12L
+       movdqa  XMMWORD [wk(7)], xmm0   ; wk(7)=tmp12H
+
+       ; -- Odd part
+
+       movdqa  xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       movdqa  xmm5,xmm6
+       movdqa  xmm7,xmm4
+       paddw   xmm5,xmm3               ; xmm5=z3
+       paddw   xmm7,xmm1               ; xmm7=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movdqa    xmm2,xmm5
+       movdqa    xmm0,xmm5
+       punpcklwd xmm2,xmm7
+       punpckhwd xmm0,xmm7
+       movdqa    xmm5,xmm2
+       movdqa    xmm7,xmm0
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm2=z3L
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3H
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_F117_F078)]       ; xmm7=z4H
+
+       movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=z3L
+       movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=z3H
+
+       ; (Original)
+       ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+       ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+       ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+       ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+       ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+       ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+       ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+       ; tmp0 += z3;  tmp1 += z4;
+       ; tmp2 += z3;  tmp3 += z4;
+
+       movdqa    xmm2,xmm3
+       movdqa    xmm0,xmm3
+       punpcklwd xmm2,xmm4
+       punpckhwd xmm0,xmm4
+       movdqa    xmm3,xmm2
+       movdqa    xmm4,xmm0
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm2=tmp0L
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0H
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3L
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm4=tmp3H
+
+       paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp0L
+       paddd   xmm0, XMMWORD [wk(11)]  ; xmm0=tmp0H
+       paddd   xmm3,xmm5               ; xmm3=tmp3L
+       paddd   xmm4,xmm7               ; xmm4=tmp3H
+
+       movdqa  XMMWORD [wk(8)], xmm2   ; wk(8)=tmp0L
+       movdqa  XMMWORD [wk(9)], xmm0   ; wk(9)=tmp0H
+
+       movdqa    xmm2,xmm1
+       movdqa    xmm0,xmm1
+       punpcklwd xmm2,xmm6
+       punpckhwd xmm0,xmm6
+       movdqa    xmm1,xmm2
+       movdqa    xmm6,xmm0
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm2=tmp1L
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1H
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm1=tmp2L
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
+
+       paddd   xmm2,xmm5               ; xmm2=tmp1L
+       paddd   xmm0,xmm7               ; xmm0=tmp1H
+       paddd   xmm1, XMMWORD [wk(10)]  ; xmm1=tmp2L
+       paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+       movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=tmp1L
+       movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=tmp1H
+
+       ; -- Final output stage
+
+       movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+       movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=tmp10H
+
+       movdqa  xmm2,xmm5
+       movdqa  xmm0,xmm7
+       paddd   xmm5,xmm3               ; xmm5=data0L
+       paddd   xmm7,xmm4               ; xmm7=data0H
+       psubd   xmm2,xmm3               ; xmm2=data7L
+       psubd   xmm0,xmm4               ; xmm0=data7H
+
+       movdqa  xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm3=[PD_DESCALE_P1]
+
+       paddd   xmm5,xmm3
+       paddd   xmm7,xmm3
+       psrad   xmm5,DESCALE_P1
+       psrad   xmm7,DESCALE_P1
+       paddd   xmm2,xmm3
+       paddd   xmm0,xmm3
+       psrad   xmm2,DESCALE_P1
+       psrad   xmm0,DESCALE_P1
+
+       packssdw  xmm5,xmm7             ; xmm5=data0=(00 01 02 03 04 05 06 07)
+       packssdw  xmm2,xmm0             ; xmm2=data7=(70 71 72 73 74 75 76 77)
+
+       movdqa  xmm4, XMMWORD [wk(4)]   ; xmm4=tmp11L
+       movdqa  xmm3, XMMWORD [wk(5)]   ; xmm3=tmp11H
+
+       movdqa  xmm7,xmm4
+       movdqa  xmm0,xmm3
+       paddd   xmm4,xmm1               ; xmm4=data1L
+       paddd   xmm3,xmm6               ; xmm3=data1H
+       psubd   xmm7,xmm1               ; xmm7=data6L
+       psubd   xmm0,xmm6               ; xmm0=data6H
+
+       movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm1=[PD_DESCALE_P1]
+
+       paddd   xmm4,xmm1
+       paddd   xmm3,xmm1
+       psrad   xmm4,DESCALE_P1
+       psrad   xmm3,DESCALE_P1
+       paddd   xmm7,xmm1
+       paddd   xmm0,xmm1
+       psrad   xmm7,DESCALE_P1
+       psrad   xmm0,DESCALE_P1
+
+       packssdw  xmm4,xmm3             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+       packssdw  xmm7,xmm0             ; xmm7=data6=(60 61 62 63 64 65 66 67)
+
+       movdqa    xmm6,xmm5             ; transpose coefficients(phase 1)
+       punpcklwd xmm5,xmm4             ; xmm5=(00 10 01 11 02 12 03 13)
+       punpckhwd xmm6,xmm4             ; xmm6=(04 14 05 15 06 16 07 17)
+       movdqa    xmm1,xmm7             ; transpose coefficients(phase 1)
+       punpcklwd xmm7,xmm2             ; xmm7=(60 70 61 71 62 72 63 73)
+       punpckhwd xmm1,xmm2             ; xmm1=(64 74 65 75 66 76 67 77)
+
+       movdqa  xmm3, XMMWORD [wk(6)]   ; xmm3=tmp12L
+       movdqa  xmm0, XMMWORD [wk(7)]   ; xmm0=tmp12H
+       movdqa  xmm4, XMMWORD [wk(10)]  ; xmm4=tmp1L
+       movdqa  xmm2, XMMWORD [wk(11)]  ; xmm2=tmp1H
+
+       movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 01 11 02 12 03 13)
+       movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=(04 14 05 15 06 16 07 17)
+       movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=(60 70 61 71 62 72 63 73)
+       movdqa  XMMWORD [wk(5)], xmm1   ; wk(5)=(64 74 65 75 66 76 67 77)
+
+       movdqa  xmm5,xmm3
+       movdqa  xmm6,xmm0
+       paddd   xmm3,xmm4               ; xmm3=data2L
+       paddd   xmm0,xmm2               ; xmm0=data2H
+       psubd   xmm5,xmm4               ; xmm5=data5L
+       psubd   xmm6,xmm2               ; xmm6=data5H
+
+       movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm7=[PD_DESCALE_P1]
+
+       paddd   xmm3,xmm7
+       paddd   xmm0,xmm7
+       psrad   xmm3,DESCALE_P1
+       psrad   xmm0,DESCALE_P1
+       paddd   xmm5,xmm7
+       paddd   xmm6,xmm7
+       psrad   xmm5,DESCALE_P1
+       psrad   xmm6,DESCALE_P1
+
+       packssdw  xmm3,xmm0             ; xmm3=data2=(20 21 22 23 24 25 26 27)
+       packssdw  xmm5,xmm6             ; xmm5=data5=(50 51 52 53 54 55 56 57)
+
+       movdqa  xmm1, XMMWORD [wk(2)]   ; xmm1=tmp13L
+       movdqa  xmm4, XMMWORD [wk(3)]   ; xmm4=tmp13H
+       movdqa  xmm2, XMMWORD [wk(8)]   ; xmm2=tmp0L
+       movdqa  xmm7, XMMWORD [wk(9)]   ; xmm7=tmp0H
+
+       movdqa  xmm0,xmm1
+       movdqa  xmm6,xmm4
+       paddd   xmm1,xmm2               ; xmm1=data3L
+       paddd   xmm4,xmm7               ; xmm4=data3H
+       psubd   xmm0,xmm2               ; xmm0=data4L
+       psubd   xmm6,xmm7               ; xmm6=data4H
+
+       movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm2=[PD_DESCALE_P1]
+
+       paddd   xmm1,xmm2
+       paddd   xmm4,xmm2
+       psrad   xmm1,DESCALE_P1
+       psrad   xmm4,DESCALE_P1
+       paddd   xmm0,xmm2
+       paddd   xmm6,xmm2
+       psrad   xmm0,DESCALE_P1
+       psrad   xmm6,DESCALE_P1
+
+       packssdw  xmm1,xmm4             ; xmm1=data3=(30 31 32 33 34 35 36 37)
+       packssdw  xmm0,xmm6             ; xmm0=data4=(40 41 42 43 44 45 46 47)
+
+       movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(00 10 01 11 02 12 03 13)
+       movdqa  xmm2, XMMWORD [wk(1)]   ; xmm2=(04 14 05 15 06 16 07 17)
+
+       movdqa    xmm4,xmm3             ; transpose coefficients(phase 1)
+       punpcklwd xmm3,xmm1             ; xmm3=(20 30 21 31 22 32 23 33)
+       punpckhwd xmm4,xmm1             ; xmm4=(24 34 25 35 26 36 27 37)
+       movdqa    xmm6,xmm0             ; transpose coefficients(phase 1)
+       punpcklwd xmm0,xmm5             ; xmm0=(40 50 41 51 42 52 43 53)
+       punpckhwd xmm6,xmm5             ; xmm6=(44 54 45 55 46 56 47 57)
+
+       movdqa    xmm1,xmm7             ; transpose coefficients(phase 2)
+       punpckldq xmm7,xmm3             ; xmm7=(00 10 20 30 01 11 21 31)
+       punpckhdq xmm1,xmm3             ; xmm1=(02 12 22 32 03 13 23 33)
+       movdqa    xmm5,xmm2             ; transpose coefficients(phase 2)
+       punpckldq xmm2,xmm4             ; xmm2=(04 14 24 34 05 15 25 35)
+       punpckhdq xmm5,xmm4             ; xmm5=(06 16 26 36 07 17 27 37)
+
+       movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=(60 70 61 71 62 72 63 73)
+       movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(64 74 65 75 66 76 67 77)
+
+       movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=(04 14 24 34 05 15 25 35)
+       movdqa  XMMWORD [wk(7)], xmm5   ; wk(7)=(06 16 26 36 07 17 27 37)
+
+       movdqa    xmm2,xmm0             ; transpose coefficients(phase 2)
+       punpckldq xmm0,xmm3             ; xmm0=(40 50 60 70 41 51 61 71)
+       punpckhdq xmm2,xmm3             ; xmm2=(42 52 62 72 43 53 63 73)
+       movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm4             ; xmm6=(44 54 64 74 45 55 65 75)
+       punpckhdq xmm5,xmm4             ; xmm5=(46 56 66 76 47 57 67 77)
+
+       movdqa     xmm3,xmm7            ; transpose coefficients(phase 3)
+       punpcklqdq xmm7,xmm0            ; xmm7=col0=(00 10 20 30 40 50 60 70)
+       punpckhqdq xmm3,xmm0            ; xmm3=col1=(01 11 21 31 41 51 61 71)
+       movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+       punpcklqdq xmm1,xmm2            ; xmm1=col2=(02 12 22 32 42 52 62 72)
+       punpckhqdq xmm4,xmm2            ; xmm4=col3=(03 13 23 33 43 53 63 73)
+
+       movdqa  xmm0, XMMWORD [wk(6)]   ; xmm0=(04 14 24 34 05 15 25 35)
+       movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=(06 16 26 36 07 17 27 37)
+
+       movdqa  XMMWORD [wk(8)], xmm3   ; wk(8)=col1
+       movdqa  XMMWORD [wk(9)], xmm4   ; wk(9)=col3
+
+       movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+       punpcklqdq xmm0,xmm6            ; xmm0=col4=(04 14 24 34 44 54 64 74)
+       punpckhqdq xmm3,xmm6            ; xmm3=col5=(05 15 25 35 45 55 65 75)
+       movdqa     xmm4,xmm2            ; transpose coefficients(phase 3)
+       punpcklqdq xmm2,xmm5            ; xmm2=col6=(06 16 26 36 46 56 66 76)
+       punpckhqdq xmm4,xmm5            ; xmm4=col7=(07 17 27 37 47 57 67 77)
+
+       movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+       movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+.column_end:
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+
+       ; -- Even part
+
+       ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
+
+       ; (Original)
+       ; z1 = (z2 + z3) * 0.541196100;
+       ; tmp2 = z1 + z3 * -1.847759065;
+       ; tmp3 = z1 + z2 * 0.765366865;
+       ;
+       ; (This implementation)
+       ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+       ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+       movdqa    xmm6,xmm1             ; xmm1=in2=z2
+       movdqa    xmm5,xmm1
+       punpcklwd xmm6,xmm2             ; xmm2=in6=z3
+       punpckhwd xmm5,xmm2
+       movdqa    xmm1,xmm6
+       movdqa    xmm2,xmm5
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=tmp3L
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm2=tmp2H
+
+       movdqa    xmm3,xmm7
+       paddw     xmm7,xmm0             ; xmm7=in0+in4
+       psubw     xmm3,xmm0             ; xmm3=in0-in4
+
+       pxor      xmm4,xmm4
+       pxor      xmm0,xmm0
+       punpcklwd xmm4,xmm7             ; xmm4=tmp0L
+       punpckhwd xmm0,xmm7             ; xmm0=tmp0H
+       psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+       psrad     xmm0,(16-CONST_BITS)  ; psrad xmm0,16 & pslld xmm0,CONST_BITS
+
+       movdqa  xmm7,xmm4
+       paddd   xmm4,xmm6               ; xmm4=tmp10L
+       psubd   xmm7,xmm6               ; xmm7=tmp13L
+       movdqa  xmm6,xmm0
+       paddd   xmm0,xmm5               ; xmm0=tmp10H
+       psubd   xmm6,xmm5               ; xmm6=tmp13H
+
+       movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=tmp10L
+       movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp10H
+       movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=tmp13L
+       movdqa  XMMWORD [wk(3)], xmm6   ; wk(3)=tmp13H
+
+       pxor      xmm5,xmm5
+       pxor      xmm4,xmm4
+       punpcklwd xmm5,xmm3             ; xmm5=tmp1L
+       punpckhwd xmm4,xmm3             ; xmm4=tmp1H
+       psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+       psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+
+       movdqa  xmm0,xmm5
+       paddd   xmm5,xmm1               ; xmm5=tmp11L
+       psubd   xmm0,xmm1               ; xmm0=tmp12L
+       movdqa  xmm7,xmm4
+       paddd   xmm4,xmm2               ; xmm4=tmp11H
+       psubd   xmm7,xmm2               ; xmm7=tmp12H
+
+       movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+       movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=tmp11H
+       movdqa  XMMWORD [wk(6)], xmm0   ; wk(6)=tmp12L
+       movdqa  XMMWORD [wk(7)], xmm7   ; wk(7)=tmp12H
+
+       ; -- Odd part
+
+       movdqa  xmm6, XMMWORD [wk(9)]   ; xmm6=col3
+       movdqa  xmm3, XMMWORD [wk(8)]   ; xmm3=col1
+       movdqa  xmm1, XMMWORD [wk(11)]  ; xmm1=col7
+       movdqa  xmm2, XMMWORD [wk(10)]  ; xmm2=col5
+
+       movdqa  xmm5,xmm6
+       movdqa  xmm4,xmm3
+       paddw   xmm5,xmm1               ; xmm5=z3
+       paddw   xmm4,xmm2               ; xmm4=z4
+
+       ; (Original)
+       ; z5 = (z3 + z4) * 1.175875602;
+       ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+       ; z3 += z5;  z4 += z5;
+       ;
+       ; (This implementation)
+       ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+       ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+       movdqa    xmm0,xmm5
+       movdqa    xmm7,xmm5
+       punpcklwd xmm0,xmm4
+       punpckhwd xmm7,xmm4
+       movdqa    xmm5,xmm0
+       movdqa    xmm4,xmm7
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3L
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3H
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F117_F078)]       ; xmm4=z4H
+
+       movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=z3L
+       movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=z3H
+
+       ; (Original)
+       ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+       ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+       ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+       ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+       ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+       ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+       ;
+       ; (This implementation)
+       ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+       ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+       ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+       ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+       ; tmp0 += z3;  tmp1 += z4;
+       ; tmp2 += z3;  tmp3 += z4;
+
+       movdqa    xmm0,xmm1
+       movdqa    xmm7,xmm1
+       punpcklwd xmm0,xmm3
+       punpckhwd xmm7,xmm3
+       movdqa    xmm1,xmm0
+       movdqa    xmm3,xmm7
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0L
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp0H
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp3L
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3H
+
+       paddd   xmm0, XMMWORD [wk(10)]  ; xmm0=tmp0L
+       paddd   xmm7, XMMWORD [wk(11)]  ; xmm7=tmp0H
+       paddd   xmm1,xmm5               ; xmm1=tmp3L
+       paddd   xmm3,xmm4               ; xmm3=tmp3H
+
+       movdqa  XMMWORD [wk(8)], xmm0   ; wk(8)=tmp0L
+       movdqa  XMMWORD [wk(9)], xmm7   ; wk(9)=tmp0H
+
+       movdqa    xmm0,xmm2
+       movdqa    xmm7,xmm2
+       punpcklwd xmm0,xmm6
+       punpckhwd xmm7,xmm6
+       movdqa    xmm2,xmm0
+       movdqa    xmm6,xmm7
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1L
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm7=tmp1H
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm2=tmp2L
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
+
+       paddd   xmm0,xmm5               ; xmm0=tmp1L
+       paddd   xmm7,xmm4               ; xmm7=tmp1H
+       paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp2L
+       paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+       movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=tmp1L
+       movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=tmp1H
+
+       ; -- Final output stage
+
+       movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+       movdqa  xmm4, XMMWORD [wk(1)]   ; xmm4=tmp10H
+
+       movdqa  xmm0,xmm5
+       movdqa  xmm7,xmm4
+       paddd   xmm5,xmm1               ; xmm5=data0L
+       paddd   xmm4,xmm3               ; xmm4=data0H
+       psubd   xmm0,xmm1               ; xmm0=data7L
+       psubd   xmm7,xmm3               ; xmm7=data7H
+
+       movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm1=[PD_DESCALE_P2]
+
+       paddd   xmm5,xmm1
+       paddd   xmm4,xmm1
+       psrad   xmm5,DESCALE_P2
+       psrad   xmm4,DESCALE_P2
+       paddd   xmm0,xmm1
+       paddd   xmm7,xmm1
+       psrad   xmm0,DESCALE_P2
+       psrad   xmm7,DESCALE_P2
+
+       packssdw  xmm5,xmm4             ; xmm5=data0=(00 10 20 30 40 50 60 70)
+       packssdw  xmm0,xmm7             ; xmm0=data7=(07 17 27 37 47 57 67 77)
+
+       movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=tmp11L
+       movdqa  xmm1, XMMWORD [wk(5)]   ; xmm1=tmp11H
+
+       movdqa  xmm4,xmm3
+       movdqa  xmm7,xmm1
+       paddd   xmm3,xmm2               ; xmm3=data1L
+       paddd   xmm1,xmm6               ; xmm1=data1H
+       psubd   xmm4,xmm2               ; xmm4=data6L
+       psubd   xmm7,xmm6               ; xmm7=data6H
+
+       movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm2=[PD_DESCALE_P2]
+
+       paddd   xmm3,xmm2
+       paddd   xmm1,xmm2
+       psrad   xmm3,DESCALE_P2
+       psrad   xmm1,DESCALE_P2
+       paddd   xmm4,xmm2
+       paddd   xmm7,xmm2
+       psrad   xmm4,DESCALE_P2
+       psrad   xmm7,DESCALE_P2
+
+       packssdw  xmm3,xmm1             ; xmm3=data1=(01 11 21 31 41 51 61 71)
+       packssdw  xmm4,xmm7             ; xmm4=data6=(06 16 26 36 46 56 66 76)
+
+       packsswb  xmm5,xmm4             ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+       packsswb  xmm3,xmm0             ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+       movdqa  xmm6, XMMWORD [wk(6)]   ; xmm6=tmp12L
+       movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=tmp12H
+       movdqa  xmm1, XMMWORD [wk(10)]  ; xmm1=tmp1L
+       movdqa  xmm7, XMMWORD [wk(11)]  ; xmm7=tmp1H
+
+       movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+       movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+       movdqa  xmm4,xmm6
+       movdqa  xmm0,xmm2
+       paddd   xmm6,xmm1               ; xmm6=data2L
+       paddd   xmm2,xmm7               ; xmm2=data2H
+       psubd   xmm4,xmm1               ; xmm4=data5L
+       psubd   xmm0,xmm7               ; xmm0=data5H
+
+       movdqa  xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm5=[PD_DESCALE_P2]
+
+       paddd   xmm6,xmm5
+       paddd   xmm2,xmm5
+       psrad   xmm6,DESCALE_P2
+       psrad   xmm2,DESCALE_P2
+       paddd   xmm4,xmm5
+       paddd   xmm0,xmm5
+       psrad   xmm4,DESCALE_P2
+       psrad   xmm0,DESCALE_P2
+
+       packssdw  xmm6,xmm2             ; xmm6=data2=(02 12 22 32 42 52 62 72)
+       packssdw  xmm4,xmm0             ; xmm4=data5=(05 15 25 35 45 55 65 75)
+
+       movdqa  xmm3, XMMWORD [wk(2)]   ; xmm3=tmp13L
+       movdqa  xmm1, XMMWORD [wk(3)]   ; xmm1=tmp13H
+       movdqa  xmm7, XMMWORD [wk(8)]   ; xmm7=tmp0L
+       movdqa  xmm5, XMMWORD [wk(9)]   ; xmm5=tmp0H
+
+       movdqa  xmm2,xmm3
+       movdqa  xmm0,xmm1
+       paddd   xmm3,xmm7               ; xmm3=data3L
+       paddd   xmm1,xmm5               ; xmm1=data3H
+       psubd   xmm2,xmm7               ; xmm2=data4L
+       psubd   xmm0,xmm5               ; xmm0=data4H
+
+       movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm7=[PD_DESCALE_P2]
+
+       paddd   xmm3,xmm7
+       paddd   xmm1,xmm7
+       psrad   xmm3,DESCALE_P2
+       psrad   xmm1,DESCALE_P2
+       paddd   xmm2,xmm7
+       paddd   xmm0,xmm7
+       psrad   xmm2,DESCALE_P2
+       psrad   xmm0,DESCALE_P2
+
+       movdqa    xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm5=[PB_CENTERJSAMP]
+
+       packssdw  xmm3,xmm1             ; xmm3=data3=(03 13 23 33 43 53 63 73)
+       packssdw  xmm2,xmm0             ; xmm2=data4=(04 14 24 34 44 54 64 74)
+
+       movdqa    xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+       movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+       packsswb  xmm6,xmm2             ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+       packsswb  xmm3,xmm4             ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+       paddb     xmm7,xmm5
+       paddb     xmm1,xmm5
+       paddb     xmm6,xmm5
+       paddb     xmm3,xmm5
+
+       movdqa    xmm0,xmm7     ; transpose coefficients(phase 1)
+       punpcklbw xmm7,xmm1     ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+       punpckhbw xmm0,xmm1     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+       movdqa    xmm2,xmm6     ; transpose coefficients(phase 1)
+       punpcklbw xmm6,xmm3     ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+       punpckhbw xmm2,xmm3     ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+       movdqa    xmm4,xmm7     ; transpose coefficients(phase 2)
+       punpcklwd xmm7,xmm6     ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+       punpckhwd xmm4,xmm6     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+       movdqa    xmm5,xmm2     ; transpose coefficients(phase 2)
+       punpcklwd xmm2,xmm0     ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+       punpckhwd xmm5,xmm0     ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+       movdqa    xmm1,xmm7     ; transpose coefficients(phase 3)
+       punpckldq xmm7,xmm2     ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+       punpckhdq xmm1,xmm2     ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+       movdqa    xmm3,xmm4     ; transpose coefficients(phase 3)
+       punpckldq xmm4,xmm5     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+       punpckhdq xmm3,xmm5     ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+       pshufd  xmm6,xmm7,0x4E  ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+       pshufd  xmm0,xmm1,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+       pshufd  xmm2,xmm4,0x4E  ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+       pshufd  xmm5,xmm3,0x4E  ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1
+       mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+
+       mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+       mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+       movq    _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2
+       movq    _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_SSE2_SUPPORTED
+%endif ; DCT_ISLOW_SUPPORTED
diff --git a/jiss2red.asm b/jiss2red.asm
new file mode 100644 (file)
index 0000000..53af6fe
--- /dev/null
@@ -0,0 +1,607 @@
+;
+; jiss2red.asm - reduced-size IDCT (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size
+; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef IDCT_SCALING_SUPPORTED
+%ifdef JIDCT_INT_SSE2_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS     13
+%define PASS1_BITS     2
+
+%define DESCALE_P1_4   (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4   (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2   (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2   (CONST_BITS+PASS1_BITS+3+2)
+
+%if CONST_BITS == 13
+F_0_211        equ      1730           ; FIX(0.211164243)
+F_0_509        equ      4176           ; FIX(0.509795579)
+F_0_601        equ      4926           ; FIX(0.601344887)
+F_0_720        equ      5906           ; FIX(0.720959822)
+F_0_765        equ      6270           ; FIX(0.765366865)
+F_0_850        equ      6967           ; FIX(0.850430095)
+F_0_899        equ      7373           ; FIX(0.899976223)
+F_1_061        equ      8697           ; FIX(1.061594337)
+F_1_272        equ     10426           ; FIX(1.272758580)
+F_1_451        equ     11893           ; FIX(1.451774981)
+F_1_847        equ     15137           ; FIX(1.847759065)
+F_2_172        equ     17799           ; FIX(2.172734803)
+F_2_562        equ     20995           ; FIX(2.562915447)
+F_3_624        equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211        equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509        equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601        equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720        equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765        equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850        equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899        equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061        equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272        equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451        equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847        equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172        equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562        equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624        equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_red_sse2)
+
+EXTN(jconst_idct_red_sse2):
+
+PW_F184_MF076  times 4 dw  F_1_847,-F_0_765
+PW_F256_F089   times 4 dw  F_2_562, F_0_899
+PW_F106_MF217  times 4 dw  F_1_061,-F_2_172
+PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509
+PW_F145_MF021  times 4 dw  F_1_451,-F_0_211
+PW_F362_MF127  times 4 dw  F_3_624,-F_1_272
+PW_F085_MF072  times 4 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4        times 4 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4        times 4 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2        times 4 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2        times 4 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP times 16 db CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_4x4_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                     JCOEFPTR coef_block,
+;                     JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+
+       align   16
+       global  EXTN(jpeg_idct_4x4_sse2)
+
+EXTN(jpeg_idct_4x4_sse2):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [wk(0)]
+       pushpic ebx
+;      push    ecx             ; unused
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     short .columnDCT
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     xmm0,xmm1
+       packsswb xmm0,xmm0
+       packsswb xmm0,xmm0
+       movd    eax,xmm0
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       psllw   xmm0,PASS1_BITS
+
+       movdqa    xmm3,xmm0     ; xmm0=in0=(00 01 02 03 04 05 06 07)
+       punpcklwd xmm0,xmm0     ; xmm0=(00 00 01 01 02 02 03 03)
+       punpckhwd xmm3,xmm3     ; xmm3=(04 04 05 05 06 06 07 07)
+
+       pshufd  xmm1,xmm0,0x50  ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
+       pshufd  xmm0,xmm0,0xFA  ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
+       pshufd  xmm6,xmm3,0x50  ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
+       pshufd  xmm3,xmm3,0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
+
+       jmp     near .column_end
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Odd part
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       movdqa    xmm4,xmm0
+       movdqa    xmm5,xmm0
+       punpcklwd xmm4,xmm1
+       punpckhwd xmm5,xmm1
+       movdqa    xmm0,xmm4
+       movdqa    xmm1,xmm5
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F256_F089)]       ; xmm4=(tmp2L)
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F256_F089)]       ; xmm5=(tmp2H)
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm0=(tmp0L)
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm1=(tmp0H)
+
+       movdqa    xmm6,xmm2
+       movdqa    xmm7,xmm2
+       punpcklwd xmm6,xmm3
+       punpckhwd xmm7,xmm3
+       movdqa    xmm2,xmm6
+       movdqa    xmm3,xmm7
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2L)
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm7=(tmp2H)
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0L)
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm3=(tmp0H)
+
+       paddd   xmm6,xmm4               ; xmm6=tmp2L
+       paddd   xmm7,xmm5               ; xmm7=tmp2H
+       paddd   xmm2,xmm0               ; xmm2=tmp0L
+       paddd   xmm3,xmm1               ; xmm3=tmp0H
+
+       movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp0L
+       movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=tmp0H
+
+       ; -- Even part
+
+       movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       pxor      xmm1,xmm1
+       pxor      xmm2,xmm2
+       punpcklwd xmm1,xmm4             ; xmm1=tmp0L
+       punpckhwd xmm2,xmm4             ; xmm2=tmp0H
+       psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
+       psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
+
+       movdqa    xmm3,xmm5             ; xmm5=in2=z2
+       punpcklwd xmm5,xmm0             ; xmm0=in6=z3
+       punpckhwd xmm3,xmm0
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm5=tmp2L
+       pmaddwd   xmm3,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm3=tmp2H
+
+       movdqa  xmm4,xmm1
+       movdqa  xmm0,xmm2
+       paddd   xmm1,xmm5               ; xmm1=tmp10L
+       paddd   xmm2,xmm3               ; xmm2=tmp10H
+       psubd   xmm4,xmm5               ; xmm4=tmp12L
+       psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+       ; -- Final output stage
+
+       movdqa  xmm5,xmm1
+       movdqa  xmm3,xmm2
+       paddd   xmm1,xmm6               ; xmm1=data0L
+       paddd   xmm2,xmm7               ; xmm2=data0H
+       psubd   xmm5,xmm6               ; xmm5=data3L
+       psubd   xmm3,xmm7               ; xmm3=data3H
+
+       movdqa  xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm6=[PD_DESCALE_P1_4]
+
+       paddd   xmm1,xmm6
+       paddd   xmm2,xmm6
+       psrad   xmm1,DESCALE_P1_4
+       psrad   xmm2,DESCALE_P1_4
+       paddd   xmm5,xmm6
+       paddd   xmm3,xmm6
+       psrad   xmm5,DESCALE_P1_4
+       psrad   xmm3,DESCALE_P1_4
+
+       packssdw  xmm1,xmm2             ; xmm1=data0=(00 01 02 03 04 05 06 07)
+       packssdw  xmm5,xmm3             ; xmm5=data3=(30 31 32 33 34 35 36 37)
+
+       movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp0L
+       movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp0H
+
+       movdqa  xmm2,xmm4
+       movdqa  xmm3,xmm0
+       paddd   xmm4,xmm7               ; xmm4=data1L
+       paddd   xmm0,xmm6               ; xmm0=data1H
+       psubd   xmm2,xmm7               ; xmm2=data2L
+       psubd   xmm3,xmm6               ; xmm3=data2H
+
+       movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm7=[PD_DESCALE_P1_4]
+
+       paddd   xmm4,xmm7
+       paddd   xmm0,xmm7
+       psrad   xmm4,DESCALE_P1_4
+       psrad   xmm0,DESCALE_P1_4
+       paddd   xmm2,xmm7
+       paddd   xmm3,xmm7
+       psrad   xmm2,DESCALE_P1_4
+       psrad   xmm3,DESCALE_P1_4
+
+       packssdw  xmm4,xmm0             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+       packssdw  xmm2,xmm3             ; xmm2=data2=(20 21 22 23 24 25 26 27)
+
+       movdqa    xmm6,xmm1     ; transpose coefficients(phase 1)
+       punpcklwd xmm1,xmm4     ; xmm1=(00 10 01 11 02 12 03 13)
+       punpckhwd xmm6,xmm4     ; xmm6=(04 14 05 15 06 16 07 17)
+       movdqa    xmm7,xmm2     ; transpose coefficients(phase 1)
+       punpcklwd xmm2,xmm5     ; xmm2=(20 30 21 31 22 32 23 33)
+       punpckhwd xmm7,xmm5     ; xmm7=(24 34 25 35 26 36 27 37)
+
+       movdqa    xmm0,xmm1     ; transpose coefficients(phase 2)
+       punpckldq xmm1,xmm2     ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
+       punpckhdq xmm0,xmm2     ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
+       movdqa    xmm3,xmm6     ; transpose coefficients(phase 2)
+       punpckldq xmm6,xmm7     ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
+       punpckhdq xmm3,xmm7     ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
+.column_end:
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows, store into output array.
+
+       mov     eax, [original_ebp]
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+
+       ; -- Even part
+
+       pxor      xmm4,xmm4
+       punpcklwd xmm4,xmm1             ; xmm4=tmp0
+       psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
+
+       ; -- Odd part
+
+       punpckhwd xmm1,xmm0
+       punpckhwd xmm6,xmm3
+       movdqa    xmm5,xmm1
+       movdqa    xmm2,xmm6
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F256_F089)]       ; xmm1=(tmp2)
+       pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2)
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm5=(tmp0)
+       pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0)
+
+       paddd     xmm6,xmm1             ; xmm6=tmp2
+       paddd     xmm2,xmm5             ; xmm2=tmp0
+
+       ; -- Even part
+
+       punpcklwd xmm0,xmm3
+       pmaddwd   xmm0,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm0=tmp2
+
+       movdqa    xmm7,xmm4
+       paddd     xmm4,xmm0             ; xmm4=tmp10
+       psubd     xmm7,xmm0             ; xmm7=tmp12
+
+       ; -- Final output stage
+
+       movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)]      ; xmm1=[PD_DESCALE_P2_4]
+
+       movdqa  xmm5,xmm4
+       movdqa  xmm3,xmm7
+       paddd   xmm4,xmm6               ; xmm4=data0=(00 10 20 30)
+       paddd   xmm7,xmm2               ; xmm7=data1=(01 11 21 31)
+       psubd   xmm5,xmm6               ; xmm5=data3=(03 13 23 33)
+       psubd   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+
+       paddd   xmm4,xmm1
+       paddd   xmm7,xmm1
+       psrad   xmm4,DESCALE_P2_4
+       psrad   xmm7,DESCALE_P2_4
+       paddd   xmm5,xmm1
+       paddd   xmm3,xmm1
+       psrad   xmm5,DESCALE_P2_4
+       psrad   xmm3,DESCALE_P2_4
+
+       packssdw  xmm4,xmm3             ; xmm4=(00 10 20 30 02 12 22 32)
+       packssdw  xmm7,xmm5             ; xmm7=(01 11 21 31 03 13 23 33)
+
+       movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+       punpcklwd xmm4,xmm7             ; xmm4=(00 01 10 11 20 21 30 31)
+       punpckhwd xmm0,xmm7             ; xmm0=(02 03 12 13 22 23 32 33)
+
+       movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+       punpckldq xmm4,xmm0             ; xmm4=(00 01 02 03 10 11 12 13)
+       punpckhdq xmm6,xmm0             ; xmm6=(20 21 22 23 30 31 32 33)
+
+       packsswb  xmm4,xmm6             ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
+       paddb     xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+       pshufd    xmm2,xmm4,0x39        ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
+       pshufd    xmm1,xmm4,0x4E        ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
+       pshufd    xmm3,xmm4,0x93        ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       movd    _DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+       movd    _DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+       mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movd    _DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+       movd    _DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; unused
+       poppic  ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jpeg_idct_2x2_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                     JCOEFPTR coef_block,
+;                     JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+       align   16
+       global  EXTN(jpeg_idct_2x2_sse2)
+
+EXTN(jpeg_idct_2x2_sse2):
+       push    ebp
+       mov     ebp,esp
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input.
+
+       mov     edx, POINTER [compptr(ebp)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+
+       ; | input:                  | result:        |
+       ; | 00 01 ** 03 ** 05 ** 07 |                |
+       ; | 10 11 ** 13 ** 15 ** 17 |                |
+       ; | ** ** ** ** ** ** ** ** |                |
+       ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+       ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+       ; | 50 51 ** 53 ** 55 ** 57 |                |
+       ; | ** ** ** ** ** ** ** ** |                |
+       ; | 70 71 ** 73 ** 75 ** 77 |                |
+
+       ; -- Odd part
+
+       movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+       pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
+       ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
+
+       pcmpeqd   xmm7,xmm7
+       pslld     xmm7,WORD_BIT         ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
+
+       movdqa    xmm4,xmm0             ; xmm4=(10 11 ** 13 ** 15 ** 17)
+       movdqa    xmm5,xmm2             ; xmm5=(50 51 ** 53 ** 55 ** 57)
+       punpcklwd xmm4,xmm1             ; xmm4=(10 30 11 31 ** ** 13 33)
+       punpcklwd xmm5,xmm3             ; xmm5=(50 70 51 71 ** ** 53 73)
+       pmaddwd   xmm4,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd   xmm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+       psrld   xmm0,WORD_BIT           ; xmm0=(11 -- 13 -- 15 -- 17 --)
+       pand    xmm1,xmm7               ; xmm1=(-- 31 -- 33 -- 35 -- 37)
+       psrld   xmm2,WORD_BIT           ; xmm2=(51 -- 53 -- 55 -- 57 --)
+       pand    xmm3,xmm7               ; xmm3=(-- 71 -- 73 -- 75 -- 77)
+       por     xmm0,xmm1               ; xmm0=(11 31 13 33 15 35 17 37)
+       por     xmm2,xmm3               ; xmm2=(51 71 53 73 55 75 57 77)
+       pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)]
+
+       paddd   xmm4,xmm5               ; xmm4=tmp0[col0 col1 **** col3]
+       paddd   xmm0,xmm2               ; xmm0=tmp0[col1 col3 col5 col7]
+
+       ; -- Even part
+
+       movdqa  xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       pmullw  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+       ; xmm6=(00 01 ** 03 ** 05 ** 07)
+
+       movdqa  xmm1,xmm6               ; xmm1=(00 01 ** 03 ** 05 ** 07)
+       pslld   xmm6,WORD_BIT           ; xmm6=(-- 00 -- ** -- ** -- **)
+       pand    xmm1,xmm7               ; xmm1=(-- 01 -- 03 -- 05 -- 07)
+       psrad   xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
+       psrad   xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
+
+       ; -- Final output stage
+
+       movdqa  xmm3,xmm6
+       movdqa  xmm5,xmm1
+       paddd   xmm6,xmm4       ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
+       paddd   xmm1,xmm0       ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
+       psubd   xmm3,xmm4       ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
+       psubd   xmm5,xmm0       ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
+
+       movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)]      ; xmm2=[PD_DESCALE_P1_2]
+
+       punpckldq  xmm6,xmm3            ; xmm6=(A0 B0 ** **)
+
+       movdqa     xmm7,xmm1
+       punpcklqdq xmm1,xmm5            ; xmm1=(A1 A3 B1 B3)
+       punpckhqdq xmm7,xmm5            ; xmm7=(A5 A7 B5 B7)
+
+       paddd   xmm6,xmm2
+       psrad   xmm6,DESCALE_P1_2
+
+       paddd   xmm1,xmm2
+       paddd   xmm7,xmm2
+       psrad   xmm1,DESCALE_P1_2
+       psrad   xmm7,DESCALE_P1_2
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows, store into output array.
+
+       mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(ebp)]
+
+       ; | input:| result:|
+       ; | A0 B0 |        |
+       ; | A1 B1 | C0 C1  |
+       ; | A3 B3 | D0 D1  |
+       ; | A5 B5 |        |
+       ; | A7 B7 |        |
+
+       ; -- Odd part
+
+       packssdw  xmm1,xmm1             ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
+       packssdw  xmm7,xmm7             ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
+       pmaddwd   xmm1,[GOTOFF(ebx,PW_F362_MF127)]
+       pmaddwd   xmm7,[GOTOFF(ebx,PW_F085_MF072)]
+
+       paddd     xmm1,xmm7             ; xmm1=tmp0[row0 row1 row0 row1]
+
+       ; -- Even part
+
+       pslld     xmm6,(CONST_BITS+2)   ; xmm6=tmp10[row0 row1 **** ****]
+
+       ; -- Final output stage
+
+       movdqa    xmm4,xmm6
+       paddd     xmm6,xmm1     ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
+       psubd     xmm4,xmm1     ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
+
+       punpckldq xmm6,xmm4     ; xmm6=(C0 D0 C1 D1)
+
+       paddd     xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)]
+       psrad     xmm6,DESCALE_P2_2
+
+       packssdw  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
+       packsswb  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
+       paddb     xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+       pextrw  ebx,xmm6,0x00           ; ebx=(C0 D0 -- --)
+       pextrw  ecx,xmm6,0x01           ; ecx=(C1 D1 -- --)
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+       mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       pop     ebp
+       ret
+
+%endif ; JIDCT_INT_SSE2_SUPPORTED
+%endif ; IDCT_SCALING_SUPPORTED
diff --git a/jisseflt.asm b/jisseflt.asm
new file mode 100644 (file)
index 0000000..20eaeeb
--- /dev/null
@@ -0,0 +1,582 @@
+;
+; jisseflt.asm - floating-point IDCT (SSE & MMX)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+%ifdef DCT_FLOAT_SUPPORTED
+%ifdef JIDCT_FLT_SSE_MMX_SUPPORTED
+
+; This module is specialized to the case DCTSIZE = 8.
+;
+%if DCTSIZE != 8
+%error "Sorry, this code only copes with 8x8 DCTs."
+%endif
+
+; --------------------------------------------------------------------------
+
+%macro unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+       shufps  %1,%2,0x44
+%endmacro
+
+%macro unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+       shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+       global  EXTN(jconst_idct_float_sse)
+
+EXTN(jconst_idct_float_sse):
+
+PD_1_414       times 4 dd  1.414213562373095048801689
+PD_1_847       times 4 dd  1.847759065022573512256366
+PD_1_082       times 4 dd  1.082392200292393968799446
+PD_M2_613      times 4 dd -2.613125929752753055713286
+PD_0_125       times 4 dd  0.125       ; 1/8
+PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jpeg_idct_float_sse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+;                      JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define cinfo(b)       (b)+8           ; j_decompress_ptr cinfo
+%define compptr(b)     (b)+12          ; jpeg_component_info * compptr
+%define coef_block(b)  (b)+16          ; JCOEFPTR coef_block
+%define output_buf(b)  (b)+20          ; JSAMPARRAY output_buf
+%define output_col(b)  (b)+24          ; JDIMENSION output_col
+
+%define original_ebp   ebp+0
+%define wk(i)          ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM         2
+%define workspace      wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                       ; FAST_FLOAT workspace[DCTSIZE2]
+
+       align   16
+       global  EXTN(jpeg_idct_float_sse)
+
+EXTN(jpeg_idct_float_sse):
+       push    ebp
+       mov     eax,esp                         ; eax = original ebp
+       sub     esp, byte 4
+       and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+       mov     [esp],eax
+       mov     ebp,esp                         ; ebp = aligned ebp
+       lea     esp, [workspace]
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+       push    esi
+       push    edi
+
+       get_GOT ebx             ; get GOT address
+
+       ; ---- Pass 1: process columns from input, store into work array.
+
+;      mov     eax, [original_ebp]
+       mov     edx, POINTER [compptr(eax)]
+       mov     edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr
+       mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+       lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
+       mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       jnz     near .columnDCT
+
+       movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+       por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+       por     mm1,mm0
+       packsswb mm1,mm1
+       movd    eax,mm1
+       test    eax,eax
+       jnz     short .columnDCT
+
+       ; -- AC terms all zero
+
+       movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+       punpckhwd mm1,mm0                       ; mm1=(** 02 ** 03)
+       punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+       psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in0H=(02 03)
+       psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+       cvtpi2ps  xmm3,mm1                      ; xmm3=(02 03 ** **)
+       cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+       movlhps   xmm0,xmm3                     ; xmm0=in0=(00 01 02 03)
+
+       mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm1,xmm0
+       movaps  xmm2,xmm0
+       movaps  xmm3,xmm0
+
+       shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+       shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+       shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+       shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+       movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+       movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+       jmp     near .nextcolumn
+       alignx  16,7
+%endif
+.columnDCT:
+
+       ; -- Even part
+
+       movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+       movq      mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+       movq      mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+       movq      mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+       punpckhwd mm4,mm0                       ; mm4=(** 02 ** 03)
+       punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+       punpckhwd mm5,mm1                       ; mm5=(** 22 ** 23)
+       punpcklwd mm1,mm1                       ; mm1=(20 20 21 21)
+
+       psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in0H=(02 03)
+       psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+       cvtpi2ps  xmm4,mm4                      ; xmm4=(02 03 ** **)
+       cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+       psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in2H=(22 23)
+       psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in2L=(20 21)
+       cvtpi2ps  xmm5,mm5                      ; xmm5=(22 23 ** **)
+       cvtpi2ps  xmm1,mm1                      ; xmm1=(20 21 ** **)
+
+       punpckhwd mm6,mm2                       ; mm6=(** 42 ** 43)
+       punpcklwd mm2,mm2                       ; mm2=(40 40 41 41)
+       punpckhwd mm7,mm3                       ; mm7=(** 62 ** 63)
+       punpcklwd mm3,mm3                       ; mm3=(60 60 61 61)
+
+       psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in4H=(42 43)
+       psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in4L=(40 41)
+       cvtpi2ps  xmm6,mm6                      ; xmm6=(42 43 ** **)
+       cvtpi2ps  xmm2,mm2                      ; xmm2=(40 41 ** **)
+       psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in6H=(62 63)
+       psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in6L=(60 61)
+       cvtpi2ps  xmm7,mm7                      ; xmm7=(62 63 ** **)
+       cvtpi2ps  xmm3,mm3                      ; xmm3=(60 61 ** **)
+
+       movlhps   xmm0,xmm4                     ; xmm0=in0=(00 01 02 03)
+       movlhps   xmm1,xmm5                     ; xmm1=in2=(20 21 22 23)
+       mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movlhps   xmm2,xmm6                     ; xmm2=in4=(40 41 42 43)
+       movlhps   xmm3,xmm7                     ; xmm3=in6=(60 61 62 63)
+       mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm4,xmm0
+       movaps  xmm5,xmm1
+       subps   xmm0,xmm2               ; xmm0=tmp11
+       subps   xmm1,xmm3
+       addps   xmm4,xmm2               ; xmm4=tmp10
+       addps   xmm5,xmm3               ; xmm5=tmp13
+
+       mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+       subps   xmm1,xmm5               ; xmm1=tmp12
+
+       movaps  xmm6,xmm4
+       movaps  xmm7,xmm0
+       subps   xmm4,xmm5               ; xmm4=tmp3
+       subps   xmm0,xmm1               ; xmm0=tmp2
+       addps   xmm6,xmm5               ; xmm6=tmp0
+       addps   xmm7,xmm1               ; xmm7=tmp1
+
+       movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+       movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+       ; -- Odd part
+
+       movq      mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+       movq      mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+       movq      mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+       movq      mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+       punpckhwd mm6,mm4                       ; mm6=(** 12 ** 13)
+       punpcklwd mm4,mm4                       ; mm4=(10 10 11 11)
+       punpckhwd mm2,mm0                       ; mm2=(** 32 ** 33)
+       punpcklwd mm0,mm0                       ; mm0=(30 30 31 31)
+
+       psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in1H=(12 13)
+       psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in1L=(10 11)
+       cvtpi2ps  xmm4,mm6                      ; xmm4=(12 13 ** **)
+       cvtpi2ps  xmm2,mm4                      ; xmm2=(10 11 ** **)
+       psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in3H=(32 33)
+       psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in3L=(30 31)
+       cvtpi2ps  xmm0,mm2                      ; xmm0=(32 33 ** **)
+       cvtpi2ps  xmm3,mm0                      ; xmm3=(30 31 ** **)
+
+       punpckhwd mm7,mm5                       ; mm7=(** 52 ** 53)
+       punpcklwd mm5,mm5                       ; mm5=(50 50 51 51)
+       punpckhwd mm3,mm1                       ; mm3=(** 72 ** 73)
+       punpcklwd mm1,mm1                       ; mm1=(70 70 71 71)
+
+       movlhps   xmm2,xmm4                     ; xmm2=in1=(10 11 12 13)
+       movlhps   xmm3,xmm0                     ; xmm3=in3=(30 31 32 33)
+
+       psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in5H=(52 53)
+       psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in5L=(50 51)
+       cvtpi2ps  xmm4,mm7                      ; xmm4=(52 53 ** **)
+       cvtpi2ps  xmm5,mm5                      ; xmm5=(50 51 ** **)
+       psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in7H=(72 73)
+       psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in7L=(70 71)
+       cvtpi2ps  xmm0,mm3                      ; xmm0=(72 73 ** **)
+       cvtpi2ps  xmm1,mm1                      ; xmm1=(70 71 ** **)
+
+       mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movlhps   xmm5,xmm4                     ; xmm5=in5=(50 51 52 53)
+       movlhps   xmm1,xmm0                     ; xmm1=in7=(70 71 72 73)
+       mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+       mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+       movaps  xmm4,xmm2
+       movaps  xmm0,xmm5
+       addps   xmm2,xmm1               ; xmm2=z11
+       addps   xmm5,xmm3               ; xmm5=z13
+       subps   xmm4,xmm1               ; xmm4=z12
+       subps   xmm0,xmm3               ; xmm0=z10
+
+       movaps  xmm1,xmm2
+       subps   xmm2,xmm5
+       addps   xmm1,xmm5               ; xmm1=tmp7
+
+       mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+       movaps  xmm3,xmm0
+       addps   xmm0,xmm4
+       mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+       mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+       mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+       addps   xmm3,xmm0               ; xmm3=tmp12
+       subps   xmm4,xmm0               ; xmm4=tmp10
+
+       ; -- Final output stage
+
+       subps   xmm3,xmm1               ; xmm3=tmp6
+       movaps  xmm5,xmm6
+       movaps  xmm0,xmm7
+       addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+       addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+       subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+       subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+       subps   xmm2,xmm3               ; xmm2=tmp5
+
+       movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+       unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+       unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+       movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+       unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+       unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
+
+       movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+       movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
+
+       movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+       movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
+
+       addps   xmm4,xmm2               ; xmm4=tmp4
+       movaps  xmm0,xmm7
+       movaps  xmm3,xmm5
+       addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+       addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+       subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+       subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
+
+       movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+       unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+       unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+       movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+       unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+       unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
+
+       movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+       unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+       unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+       movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+       unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+       unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
+
+       movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+       movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
+
+       movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+       movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+       movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+       movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+
+       movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+       unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+       unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+       movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+       unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+       unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
+
+       movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+       movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+       movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+       movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+
+.nextcolumn:
+       add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+       add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+       add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+       dec     ecx                                     ; ctr
+       jnz     near .columnloop
+
+       ; -- Prefetch the next coefficient block
+
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+       prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+       ; ---- Pass 2: process rows from work array, store into output array.
+
+       mov     eax, [original_ebp]
+       lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+       mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+       mov     eax, JDIMENSION [output_col(eax)]
+       mov     ecx, DCTSIZE/4                          ; ctr
+       alignx  16,7
+.rowloop:
+
+       ; -- Even part
+
+       movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movaps  xmm4,xmm0
+       movaps  xmm5,xmm1
+       subps   xmm0,xmm2               ; xmm0=tmp11
+       subps   xmm1,xmm3
+       addps   xmm4,xmm2               ; xmm4=tmp10
+       addps   xmm5,xmm3               ; xmm5=tmp13
+
+       mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+       subps   xmm1,xmm5               ; xmm1=tmp12
+
+       movaps  xmm6,xmm4
+       movaps  xmm7,xmm0
+       subps   xmm4,xmm5               ; xmm4=tmp3
+       subps   xmm0,xmm1               ; xmm0=tmp2
+       addps   xmm6,xmm5               ; xmm6=tmp0
+       addps   xmm7,xmm1               ; xmm7=tmp1
+
+       movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+       movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+       ; -- Odd part
+
+       movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+       movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+       movaps  xmm4,xmm2
+       movaps  xmm0,xmm5
+       addps   xmm2,xmm1               ; xmm2=z11
+       addps   xmm5,xmm3               ; xmm5=z13
+       subps   xmm4,xmm1               ; xmm4=z12
+       subps   xmm0,xmm3               ; xmm0=z10
+
+       movaps  xmm1,xmm2
+       subps   xmm2,xmm5
+       addps   xmm1,xmm5               ; xmm1=tmp7
+
+       mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+       movaps  xmm3,xmm0
+       addps   xmm0,xmm4
+       mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+       mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+       mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+       addps   xmm3,xmm0               ; xmm3=tmp12
+       subps   xmm4,xmm0               ; xmm4=tmp10
+
+       ; -- Final output stage
+
+       subps   xmm3,xmm1               ; xmm3=tmp6
+       movaps  xmm5,xmm6
+       movaps  xmm0,xmm7
+       addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+       addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+       subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+       subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+       subps   xmm2,xmm3               ; xmm2=tmp5
+
+       movaps  xmm1,[GOTOFF(ebx,PD_0_125)]     ; xmm1=[PD_0_125]
+
+       mulps   xmm6,xmm1               ; descale(1/8)
+       mulps   xmm7,xmm1               ; descale(1/8)
+       mulps   xmm5,xmm1               ; descale(1/8)
+       mulps   xmm0,xmm1               ; descale(1/8)
+
+       movhlps   xmm3,xmm6
+       movhlps   xmm1,xmm7
+       cvtps2pi  mm0,xmm6              ; round to int32, mm0=data0L=(00 10)
+       cvtps2pi  mm1,xmm7              ; round to int32, mm1=data1L=(01 11)
+       cvtps2pi  mm2,xmm3              ; round to int32, mm2=data0H=(20 30)
+       cvtps2pi  mm3,xmm1              ; round to int32, mm3=data1H=(21 31)
+       packssdw  mm0,mm2               ; mm0=data0=(00 10 20 30)
+       packssdw  mm1,mm3               ; mm1=data1=(01 11 21 31)
+
+       movhlps   xmm6,xmm5
+       movhlps   xmm7,xmm0
+       cvtps2pi  mm4,xmm5              ; round to int32, mm4=data7L=(07 17)
+       cvtps2pi  mm5,xmm0              ; round to int32, mm5=data6L=(06 16)
+       cvtps2pi  mm6,xmm6              ; round to int32, mm6=data7H=(27 37)
+       cvtps2pi  mm7,xmm7              ; round to int32, mm7=data6H=(26 36)
+       packssdw  mm4,mm6               ; mm4=data7=(07 17 27 37)
+       packssdw  mm5,mm7               ; mm5=data6=(06 16 26 36)
+
+       packsswb  mm0,mm5               ; mm0=(00 10 20 30 06 16 26 36)
+       packsswb  mm1,mm4               ; mm1=(01 11 21 31 07 17 27 37)
+
+       movaps  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp2
+       movaps  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
+
+       movaps  xmm6,[GOTOFF(ebx,PD_0_125)]     ; xmm6=[PD_0_125]
+
+       addps   xmm4,xmm2               ; xmm4=tmp4
+       movaps  xmm5,xmm3
+       movaps  xmm0,xmm1
+       addps   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+       addps   xmm1,xmm4               ; xmm1=data4=(04 14 24 34)
+       subps   xmm5,xmm2               ; xmm5=data5=(05 15 25 35)
+       subps   xmm0,xmm4               ; xmm0=data3=(03 13 23 33)
+
+       mulps   xmm3,xmm6               ; descale(1/8)
+       mulps   xmm1,xmm6               ; descale(1/8)
+       mulps   xmm5,xmm6               ; descale(1/8)
+       mulps   xmm0,xmm6               ; descale(1/8)
+
+       movhlps   xmm7,xmm3
+       movhlps   xmm2,xmm1
+       cvtps2pi  mm2,xmm3              ; round to int32, mm2=data2L=(02 12)
+       cvtps2pi  mm3,xmm1              ; round to int32, mm3=data4L=(04 14)
+       cvtps2pi  mm6,xmm7              ; round to int32, mm6=data2H=(22 32)
+       cvtps2pi  mm7,xmm2              ; round to int32, mm7=data4H=(24 34)
+       packssdw  mm2,mm6               ; mm2=data2=(02 12 22 32)
+       packssdw  mm3,mm7               ; mm3=data4=(04 14 24 34)
+
+       movhlps   xmm4,xmm5
+       movhlps   xmm6,xmm0
+       cvtps2pi  mm5,xmm5              ; round to int32, mm5=data5L=(05 15)
+       cvtps2pi  mm4,xmm0              ; round to int32, mm4=data3L=(03 13)
+       cvtps2pi  mm6,xmm4              ; round to int32, mm6=data5H=(25 35)
+       cvtps2pi  mm7,xmm6              ; round to int32, mm7=data3H=(23 33)
+       packssdw  mm5,mm6               ; mm5=data5=(05 15 25 35)
+       packssdw  mm4,mm7               ; mm4=data3=(03 13 23 33)
+
+       movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
+
+       packsswb  mm2,mm3               ; mm2=(02 12 22 32 04 14 24 34)
+       packsswb  mm4,mm5               ; mm4=(03 13 23 33 05 15 25 35)
+
+       paddb     mm0,mm6
+       paddb     mm1,mm6
+       paddb     mm2,mm6
+       paddb     mm4,mm6
+
+       movq      mm7,mm0               ; transpose coefficients(phase 1)
+       punpcklbw mm0,mm1               ; mm0=(00 01 10 11 20 21 30 31)
+       punpckhbw mm7,mm1               ; mm7=(06 07 16 17 26 27 36 37)
+       movq      mm3,mm2               ; transpose coefficients(phase 1)
+       punpcklbw mm2,mm4               ; mm2=(02 03 12 13 22 23 32 33)
+       punpckhbw mm3,mm4               ; mm3=(04 05 14 15 24 25 34 35)
+
+       movq      mm5,mm0               ; transpose coefficients(phase 2)
+       punpcklwd mm0,mm2               ; mm0=(00 01 02 03 10 11 12 13)
+       punpckhwd mm5,mm2               ; mm5=(20 21 22 23 30 31 32 33)
+       movq      mm6,mm3               ; transpose coefficients(phase 2)
+       punpcklwd mm3,mm7               ; mm3=(04 05 06 07 14 15 16 17)
+       punpckhwd mm6,mm7               ; mm6=(24 25 26 27 34 35 36 37)
+
+       movq      mm1,mm0               ; transpose coefficients(phase 3)
+       punpckldq mm0,mm3               ; mm0=(00 01 02 03 04 05 06 07)
+       punpckhdq mm1,mm3               ; mm1=(10 11 12 13 14 15 16 17)
+       movq      mm4,mm5               ; transpose coefficients(phase 3)
+       punpckldq mm5,mm6               ; mm5=(20 21 22 23 24 25 26 27)
+       punpckhdq mm4,mm6               ; mm4=(30 31 32 33 34 35 36 37)
+
+       pushpic ebx                     ; save GOT address
+
+       mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+       mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+       mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+       movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+       movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+
+       poppic  ebx                     ; restore GOT address
+
+       add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+       add     edi, byte 4*SIZEOF_JSAMPROW
+       dec     ecx                             ; ctr
+       jnz     near .rowloop
+
+       emms            ; empty MMX state
+
+       pop     edi
+       pop     esi
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       mov     esp,ebp         ; esp <- aligned ebp
+       pop     esp             ; esp <- original ebp
+       pop     ebp
+       ret
+
+%endif ; JIDCT_FLT_SSE_MMX_SUPPORTED
+%endif ; DCT_FLOAT_SUPPORTED
index d801b322da05e0cd033159ba973676e4305101ef..e3149e50798e7829575f3fe3cf7f7610e1b930b4 100644 (file)
--- a/jmemmgr.c
+++ b/jmemmgr.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : January 27, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains the JPEG system-independent memory management
  * routines.  This code is usable across a wide variety of machines; most
  * of the system dependencies have been isolated in a separate file.
@@ -51,27 +58,12 @@ extern char * getenv JPP((const char * name));
 
 
 /*
- * Many machines require storage alignment: longs must start on 4-byte
- * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
- * always returns pointers that are multiples of the worst-case alignment
- * requirement, and we had better do so too.
- * There isn't any really portable way to determine the worst-case alignment
- * requirement.  This module assumes that the alignment requirement is
- * multiples of sizeof(ALIGN_TYPE).
- * By default, we define ALIGN_TYPE as double.  This is necessary on some
- * workstations (where doubles really do need 8-byte alignment) and will work
- * fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_TYPE smaller.
- * The only place I know of where this will NOT work is certain Macintosh
- * 680x0 compilers that define double as a 10-byte IEEE extended float.
- * Doing 10-byte alignment is counterproductive because longwords won't be
- * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
- * such a compiler.
+ * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address
+ * is aligned to a 16-byte boundary; if not, a general-protection exception
+ * (#GP) is generated.
  */
 
-#ifndef ALIGN_TYPE             /* so can override from jconfig.h */
-#define ALIGN_TYPE  double
-#endif
+#define ALIGN_SIZE  16         /* sizeof SSE/SSE2 register */
 
 
 /*
@@ -81,31 +73,24 @@ extern char * getenv JPP((const char * name));
  * header with a link to the next pool of the same class.
  * Small and large pool headers are identical except that the latter's
  * link pointer must be FAR on 80x86 machines.
- * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
- * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
- * of the alignment requirement of ALIGN_TYPE.
  */
 
-typedef union small_pool_struct * small_pool_ptr;
+typedef struct small_pool_struct * small_pool_ptr;
 
-typedef union small_pool_struct {
-  struct {
-    small_pool_ptr next;       /* next in list of pools */
-    size_t bytes_used;         /* how many bytes already used within pool */
-    size_t bytes_left;         /* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;            /* included in union to ensure alignment */
+typedef struct small_pool_struct {
+  small_pool_ptr next;         /* next in list of pools */
+  size_t bytes_used;           /* how many bytes already used within pool */
+  size_t bytes_left;           /* bytes still available in this pool */
+  char dummy[ALIGN_SIZE-1];
 } small_pool_hdr;
 
-typedef union large_pool_struct FAR * large_pool_ptr;
+typedef struct large_pool_struct FAR * large_pool_ptr;
 
-typedef union large_pool_struct {
-  struct {
-    large_pool_ptr next;       /* next in list of pools */
-    size_t bytes_used;         /* how many bytes already used within pool */
-    size_t bytes_left;         /* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;            /* included in union to ensure alignment */
+typedef struct large_pool_struct {
+  large_pool_ptr next;         /* next in list of pools */
+  size_t bytes_used;           /* how many bytes already used within pool */
+  size_t bytes_left;           /* bytes still available in this pool */
+  char dummy[ALIGN_SIZE-1];
 } large_pool_hdr;
 
 
@@ -197,16 +182,16 @@ print_mem_stats (j_common_ptr cinfo, int pool_id)
          pool_id, mem->total_space_allocated);
 
   for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
-       lhdr_ptr = lhdr_ptr->hdr.next) {
+       lhdr_ptr = lhdr_ptr->next) {
     fprintf(stderr, "  Large chunk used %ld\n",
-           (long) lhdr_ptr->hdr.bytes_used);
+           (long) lhdr_ptr->bytes_used);
   }
 
   for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
-       shdr_ptr = shdr_ptr->hdr.next) {
+       shdr_ptr = shdr_ptr->next) {
     fprintf(stderr, "  Small chunk used %ld free %ld\n",
-           (long) shdr_ptr->hdr.bytes_used,
-           (long) shdr_ptr->hdr.bytes_left);
+           (long) shdr_ptr->bytes_used,
+           (long) shdr_ptr->bytes_left);
   }
 }
 
@@ -266,10 +251,10 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
     out_of_memory(cinfo, 1);   /* request exceeds malloc's ability */
 
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  /* Round up the requested size to a multiple of ALIGN_SIZE */
+  odd_bytes = sizeofobject % ALIGN_SIZE;
   if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+    sizeofobject += ALIGN_SIZE - odd_bytes;
 
   /* See if space is available in any existing pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
@@ -277,10 +262,10 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   prev_hdr_ptr = NULL;
   hdr_ptr = mem->small_list[pool_id];
   while (hdr_ptr != NULL) {
-    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+    if (hdr_ptr->bytes_left >= sizeofobject)
       break;                   /* found pool with enough space */
     prev_hdr_ptr = hdr_ptr;
-    hdr_ptr = hdr_ptr->hdr.next;
+    hdr_ptr = hdr_ptr->next;
   }
 
   /* Time to make a new pool? */
@@ -305,20 +290,20 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
     }
     mem->total_space_allocated += min_request + slop;
     /* Success, initialize the new pool header and add to end of list */
-    hdr_ptr->hdr.next = NULL;
-    hdr_ptr->hdr.bytes_used = 0;
-    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    hdr_ptr->next = NULL;
+    hdr_ptr->bytes_used = 0;
+    hdr_ptr->bytes_left = sizeofobject + slop;
     if (prev_hdr_ptr == NULL)  /* first pool in class? */
       mem->small_list[pool_id] = hdr_ptr;
     else
-      prev_hdr_ptr->hdr.next = hdr_ptr;
+      prev_hdr_ptr->next = hdr_ptr;
   }
 
   /* OK, allocate the object from the current pool */
-  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
-  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
-  hdr_ptr->hdr.bytes_used += sizeofobject;
-  hdr_ptr->hdr.bytes_left -= sizeofobject;
+  data_ptr = (char *) ((size_t) (hdr_ptr + 1) & -ALIGN_SIZE);
+  data_ptr += hdr_ptr->bytes_used; /* point to place for object */
+  hdr_ptr->bytes_used += sizeofobject;
+  hdr_ptr->bytes_left -= sizeofobject;
 
   return (void *) data_ptr;
 }
@@ -350,10 +335,10 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
     out_of_memory(cinfo, 3);   /* request exceeds malloc's ability */
 
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  /* Round up the requested size to a multiple of ALIGN_SIZE */
+  odd_bytes = sizeofobject % ALIGN_SIZE;
   if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+    sizeofobject += ALIGN_SIZE - odd_bytes;
 
   /* Always make a new pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
@@ -366,15 +351,15 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
 
   /* Success, initialize the new pool header and add to list */
-  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  hdr_ptr->next = mem->large_list[pool_id];
   /* We maintain space counts in each pool header for statistical purposes,
    * even though they are not needed for allocation.
    */
-  hdr_ptr->hdr.bytes_used = sizeofobject;
-  hdr_ptr->hdr.bytes_left = 0;
+  hdr_ptr->bytes_used = sizeofobject;
+  hdr_ptr->bytes_left = 0;
   mem->large_list[pool_id] = hdr_ptr;
 
-  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+  return (void FAR *) ((size_t) (hdr_ptr + 1) & -ALIGN_SIZE);
 }
 
 
@@ -401,6 +386,12 @@ alloc_sarray (j_common_ptr cinfo, int pool_id,
   JSAMPROW workspace;
   JDIMENSION rowsperchunk, currow, i;
   long ltemp;
+  JDIMENSION odd_samples;
+
+  /* Round up the row bytes to a multiple of ALIGN_SIZE */
+  odd_samples = samplesperrow % (ALIGN_SIZE / SIZEOF(JSAMPLE));
+  if (odd_samples > 0)
+    samplesperrow += (ALIGN_SIZE / SIZEOF(JSAMPLE)) - odd_samples;
 
   /* Calculate max # of rows allowed in one allocation chunk */
   ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
@@ -968,9 +959,9 @@ free_pool (j_common_ptr cinfo, int pool_id)
   mem->large_list[pool_id] = NULL;
 
   while (lhdr_ptr != NULL) {
-    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
-    space_freed = lhdr_ptr->hdr.bytes_used +
-                 lhdr_ptr->hdr.bytes_left +
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
+    space_freed = lhdr_ptr->bytes_used +
+                 lhdr_ptr->bytes_left +
                  SIZEOF(large_pool_hdr);
     jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
@@ -982,9 +973,9 @@ free_pool (j_common_ptr cinfo, int pool_id)
   mem->small_list[pool_id] = NULL;
 
   while (shdr_ptr != NULL) {
-    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
-    space_freed = shdr_ptr->hdr.bytes_used +
-                 shdr_ptr->hdr.bytes_left +
+    small_pool_ptr next_shdr_ptr = shdr_ptr->next;
+    space_freed = shdr_ptr->bytes_used +
+                 shdr_ptr->bytes_left +
                  SIZEOF(small_pool_hdr);
     jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
@@ -1035,22 +1026,22 @@ jinit_memory_mgr (j_common_ptr cinfo)
   cinfo->mem = NULL;           /* for safety if init fails */
 
   /* Check for configuration errors.
-   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * ALIGN_SIZE should be a power of 2; otherwise, it probably
    * doesn't reflect any real hardware alignment requirement.
    * The test is a little tricky: for X>0, X and X-1 have no one-bits
    * in common if and only if X is a power of 2, ie has only one one-bit.
    * Some compilers may give an "unreachable code" warning here; ignore it.
    */
-  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+  if ((ALIGN_SIZE & (ALIGN_SIZE-1)) != 0)
     ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
   /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
-   * a multiple of SIZEOF(ALIGN_TYPE).
+   * a multiple of ALIGN_SIZE.
    * Again, an "unreachable code" warning may be ignored here.
    * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
    */
   test_mac = (size_t) MAX_ALLOC_CHUNK;
   if ((long) test_mac != MAX_ALLOC_CHUNK ||
-      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+      (MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0)
     ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
 
   max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
index 54a7d1c447e4ed862caf5a905b0275efcf3629ef..b425519f138ad8eef9adbba5f1b4fe6776811f42 100644 (file)
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : March 28, 2005
+ * ---------------------------------------------------------------------
+ *
  * This file contains additional configuration options that customize the
  * JPEG software for special applications or support machine-dependent
  * optimizations.  Most users will not need to touch this file.
@@ -20,7 +27,9 @@
  * We do not support run-time selection of data precision, sorry.
  */
 
-#define BITS_IN_JSAMPLE  8     /* use 8 or 12 */
+/* SIMD Ext: This SIMD code only copes with 8-bit sample values. */
+
+#define BITS_IN_JSAMPLE  8     /* SIMD Ext: cannot be changed! */
 
 
 /*
@@ -157,7 +166,8 @@ typedef short INT16;
 
 /* INT32 must hold at least signed 32-bit values. */
 
-#ifndef XMD_H                  /* X11/xmd.h correctly defines INT32 */
+       /* X11/xmd.h and basetsd.h (Win32 SDK) correctly define INT32 */
+#if !defined(XMD_H) && !defined(_BASETSD_H_) && !defined(_BASETSD_H)
 typedef long INT32;
 #endif
 
@@ -180,14 +190,24 @@ typedef unsigned int JDIMENSION;
  * or code profilers that require it.
  */
 
+#if defined(_MSC_VER) || defined(__BORLANDC__) || \
+    defined(__WATCOMC__) || defined(__MWERKS__) || \
+    defined(__ICC) || defined(__INTEL_COMPILER)
+#define JCDECL  __cdecl
+#elif defined(__GNUC__)
+#define JCDECL  __attribute__((__cdecl__))
+#else
+#define JCDECL
+#endif
+
 /* a function called through method pointers: */
-#define METHODDEF(type)                static type
+#define METHODDEF(type)                static type JCDECL
 /* a function used only in its module: */
 #define LOCAL(type)            static type
 /* a function referenced thru EXTERNs: */
-#define GLOBAL(type)           type
+#define GLOBAL(type)           type JCDECL
 /* a reference to a GLOBAL function: */
-#define EXTERN(type)           extern type
+#define EXTERN(type)           extern type JCDECL
 
 
 /* This macro is used to declare a "method", that is, a function pointer.
@@ -197,9 +217,9 @@ typedef unsigned int JDIMENSION;
  */
 
 #ifdef HAVE_PROTOTYPES
-#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#define JMETHOD(type,methodname,arglist)  type (JCDECL *methodname) arglist
 #else
-#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#define JMETHOD(type,methodname,arglist)  type (JCDECL *methodname) ()
 #endif
 
 
@@ -209,11 +229,13 @@ typedef unsigned int JDIMENSION;
  * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
  */
 
+#ifndef FAR
 #ifdef NEED_FAR_POINTERS
 #define FAR  far
 #else
 #define FAR
 #endif
+#endif /* !FAR */
 
 
 /*
@@ -224,8 +246,14 @@ typedef unsigned int JDIMENSION;
  */
 
 #ifndef HAVE_BOOLEAN
-typedef int boolean;
+#ifdef TYPEDEF_UCHAR_BOOLEAN
+#ifndef __RPCNDR_H__           /* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
 #endif
+#else /* !TYPEDEF_UCHAR_BOOLEAN */
+typedef int boolean;
+#endif /* TYPEDEF_UCHAR_BOOLEAN */
+#endif /* !HAVE_BOOLEAN */
 #ifndef FALSE                  /* in case these macros already exist */
 #define FALSE  0               /* values of boolean */
 #endif
@@ -290,6 +318,7 @@ typedef int boolean;
 #define IDCT_SCALING_SUPPORTED     /* Output rescaling via IDCT? */
 #undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
 #define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define UPSAMPLE_H1V2_SUPPORTED            /* Fast/fancy processing for 1h2v? */
 #define QUANT_1PASS_SUPPORTED      /* 1-pass color quantization? */
 #define QUANT_2PASS_SUPPORTED      /* 2-pass color quantization? */
 
@@ -316,6 +345,84 @@ typedef int boolean;
 #define RGB_BLUE       2       /* Offset of Blue */
 #define RGB_PIXELSIZE  3       /* JSAMPLEs per RGB scanline element */
 
+#undef RGBX_FILLER_0XFF        /* fill dummy bytes with 0xFF in RGBX format */
+
+
+/* SIMD support options: */
+
+#ifndef JSIMD_MMX_NOT_SUPPORTED
+#define JSIMD_ENCODER_MMX_SUPPORTED    /* Use MMX    in encoding process */
+#define JSIMD_DECODER_MMX_SUPPORTED    /* Use MMX    in decoding process */
+#endif
+#ifndef JSIMD_3DNOW_NOT_SUPPORTED
+#define JSIMD_ENCODER_3DNOW_SUPPORTED  /* Use 3DNow! in encoding process */
+#define JSIMD_DECODER_3DNOW_SUPPORTED  /* Use 3DNow! in decoding process */
+#endif
+#ifndef JSIMD_SSE_NOT_SUPPORTED
+#define JSIMD_ENCODER_SSE_SUPPORTED    /* Use SSE    in encoding process */
+#define JSIMD_DECODER_SSE_SUPPORTED    /* Use SSE    in decoding process */
+#endif
+#ifndef JSIMD_SSE2_NOT_SUPPORTED
+#define JSIMD_ENCODER_SSE2_SUPPORTED   /* Use SSE2   in encoding process */
+#define JSIMD_DECODER_SSE2_SUPPORTED   /* Use SSE2   in decoding process */
+#endif
+
+/* (encoder part): */
+
+#undef JFDCT_INT_QUANTIZE_WITH_DIVISION /* Use general quantization method */
+
+#if defined(JSIMD_ENCODER_MMX_SUPPORTED)
+#define JCCOLOR_RGBYCC_MMX_SUPPORTED   /* RGB->YCC conversion with MMX */
+#define JCSAMPLE_MMX_SUPPORTED         /* downsampling with MMX */
+#define JFDCT_INT_MMX_SUPPORTED                /* forward DCT with MMX */
+#endif
+#if defined(JSIMD_ENCODER_SSE2_SUPPORTED)
+#define JCCOLOR_RGBYCC_SSE2_SUPPORTED  /* RGB->YCC conversion with SSE2 */
+#define JCSAMPLE_SSE2_SUPPORTED                /* downsampling with SSE2 */
+#define JFDCT_INT_SSE2_SUPPORTED       /* forward DCT with SSE2 */
+#endif
+#if defined(JSIMD_ENCODER_3DNOW_SUPPORTED) && \
+    defined(JSIMD_ENCODER_MMX_SUPPORTED)
+#define JFDCT_FLT_3DNOW_MMX_SUPPORTED  /* forward DCT with 3DNow!/MMX */
+#endif
+#if defined(JSIMD_ENCODER_SSE_SUPPORTED) && \
+    defined(JSIMD_ENCODER_MMX_SUPPORTED)
+#define JFDCT_FLT_SSE_MMX_SUPPORTED    /* forward DCT with SSE/MMX */
+#endif
+#if defined(JSIMD_ENCODER_SSE_SUPPORTED) && \
+    defined(JSIMD_ENCODER_SSE2_SUPPORTED)
+#define JFDCT_FLT_SSE_SSE2_SUPPORTED   /* forward DCT with SSE/SSE2 */
+#endif
+
+/* (decoder part): */
+
+#if defined(JSIMD_DECODER_MMX_SUPPORTED)
+#define JDCOLOR_YCCRGB_MMX_SUPPORTED   /* YCC->RGB conversion with MMX */
+#define JDMERGE_MMX_SUPPORTED          /* merged upsampling with MMX */
+#define JDSAMPLE_FANCY_MMX_SUPPORTED   /* fancy upsampling with MMX */
+#define JDSAMPLE_SIMPLE_MMX_SUPPORTED  /* sloppy upsampling with MMX */
+#define JIDCT_INT_MMX_SUPPORTED                /* inverse DCT with MMX */
+#endif
+#if defined(JSIMD_DECODER_SSE2_SUPPORTED)
+#define JDCOLOR_YCCRGB_SSE2_SUPPORTED  /* YCC->RGB conversion with SSE2 */
+#define JDMERGE_SSE2_SUPPORTED         /* merged upsampling with SSE2 */
+#define JDSAMPLE_FANCY_SSE2_SUPPORTED  /* fancy upsampling with SSE2 */
+#define JDSAMPLE_SIMPLE_SSE2_SUPPORTED /* sloppy upsampling with SSE2 */
+#define JIDCT_INT_SSE2_SUPPORTED       /* inverse DCT with SSE2 */
+#endif
+#if defined(JSIMD_DECODER_3DNOW_SUPPORTED) && \
+    defined(JSIMD_DECODER_MMX_SUPPORTED)
+#define JIDCT_FLT_3DNOW_MMX_SUPPORTED  /* inverse DCT with 3DNow!/MMX */
+#endif
+#if defined(JSIMD_DECODER_SSE_SUPPORTED) && \
+    defined(JSIMD_DECODER_MMX_SUPPORTED)
+#define JIDCT_FLT_SSE_MMX_SUPPORTED    /* inverse DCT with SSE/MMX */
+#endif
+#if defined(JSIMD_DECODER_SSE_SUPPORTED) && \
+    defined(JSIMD_DECODER_SSE2_SUPPORTED)
+#define JIDCT_FLT_SSE_SSE2_SUPPORTED   /* inverse DCT with SSE/SSE2 */
+#endif
+
 
 /* Definitions for speed-related optimizations. */
 
@@ -328,6 +435,9 @@ typedef int boolean;
 #ifdef __GNUC__                        /* for instance, GNU C knows about inline */
 #define INLINE __inline__
 #endif
+#ifdef _MSC_VER
+#define INLINE __inline
+#endif
 #ifndef INLINE
 #define INLINE                 /* default is to define it as empty */
 #endif
diff --git a/jpegdll.def b/jpegdll.def
new file mode 100644 (file)
index 0000000..5a86cd6
--- /dev/null
@@ -0,0 +1,73 @@
+;
+; jpegdll.def - module definition file for Win32 DLL
+;
+
+; sed -e "/\(jinit\|jpeg_simd_\(cpu\|os\|merged\)\)/d" -e "s/^EXTERN(..*) \([_A-Za-z][_A-Za-z0-9]*\).*/  \1/p" -e d jpeglib.h jpegint.h
+
+EXPORTS
+  ; API functions in jpeglib.h, which are intended
+  ; to be called by the user applications.
+  jpeg_std_error
+  jpeg_CreateCompress
+  jpeg_CreateDecompress
+  jpeg_destroy_compress
+  jpeg_destroy_decompress
+  jpeg_stdio_dest
+  jpeg_stdio_src
+  jpeg_set_defaults
+  jpeg_set_colorspace
+  jpeg_default_colorspace
+  jpeg_set_quality
+  jpeg_set_linear_quality
+  jpeg_add_quant_table
+  jpeg_quality_scaling
+  jpeg_simple_progression
+  jpeg_suppress_tables
+  jpeg_alloc_quant_table
+  jpeg_alloc_huff_table
+  jpeg_start_compress
+  jpeg_write_scanlines
+  jpeg_finish_compress
+  jpeg_write_raw_data
+  jpeg_write_marker
+  jpeg_write_m_header
+  jpeg_write_m_byte
+  jpeg_write_tables
+  jpeg_read_header
+  jpeg_start_decompress
+  jpeg_read_scanlines
+  jpeg_finish_decompress
+  jpeg_read_raw_data
+  jpeg_has_multiple_scans
+  jpeg_start_output
+  jpeg_finish_output
+  jpeg_input_complete
+  jpeg_new_colormap
+  jpeg_consume_input
+  jpeg_calc_output_dimensions
+  jpeg_save_markers
+  jpeg_set_marker_processor
+  jpeg_read_coefficients
+  jpeg_write_coefficients
+  jpeg_copy_critical_parameters
+  jpeg_abort_compress
+  jpeg_abort_decompress
+  jpeg_abort
+  jpeg_destroy
+  jpeg_resync_to_restart
+  ; Functions that are introduced by SIMD extension.
+  jpeg_simd_support
+  jpeg_simd_mask
+  jpeg_simd_color_converter
+  jpeg_simd_downsampler
+  jpeg_simd_forward_dct
+  jpeg_simd_color_deconverter
+  jpeg_simd_upsampler
+  jpeg_simd_inverse_dct
+  ; Utility functions in jutils.c.
+  ; These are needed by some applications.
+  jdiv_round_up
+  jround_up
+  jcopy_sample_rows
+  jcopy_block_row
+  jzero_far
diff --git a/jpegdll.rc b/jpegdll.rc
new file mode 100644 (file)
index 0000000..fb3d327
--- /dev/null
@@ -0,0 +1,57 @@
+//
+// jpegdll.rc - version information for Win32 DLL
+//
+
+// from <winver.h>
+#define VS_VERSION_INFO         1
+#define VS_FFI_FILEFLAGSMASK    0x0000003FL
+#define VS_FF_DEBUG             0x00000001L
+#define VOS__WINDOWS32          0x00000004L
+#define VFT_DLL                 0x00000002L
+#define VFT2_UNKNOWN            0x00000000L
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION    6,2,1,2
+ PRODUCTVERSION 6,2,1,2
+ FILEFLAGSMASK  VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+ FILEFLAGS      VS_FF_DEBUG
+#else
+ FILEFLAGS      0x00000000L
+#endif
+ FILEOS         VOS__WINDOWS32
+ FILETYPE       VFT_DLL
+ FILESUBTYPE    VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "00000000"
+        BEGIN
+            VALUE "LegalCopyright",  "Copyright (C) 1991-1998 Thomas G. Lane\0"
+            VALUE "FileDescription", "Independent JPEG Group's JPEG Library"
+                                     " with SIMD support\0"
+            VALUE "ProductName", "The Independent JPEG Group's JPEG software"
+                                 " release 6b   with x86 SIMD extension for"
+                                 " IJG JPEG library version 1.02\0"
+            VALUE "Comments", "This is not an official binary from IJG.   "
+                              "The SIMD code in this DLL is copyright (C)"
+                              " 1999-2006 MIYASAKA Masaru.\0"
+            VALUE "FileVersion",      "6.2.1.02\0"
+            VALUE "ProductVersion",   "6.2.1.02\0"
+            VALUE "OriginalFilename", "jpeg62.dll\0"
+            VALUE "InternalName",     "jpeg62\0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x0, 0
+    END
+END
+
+/////////////////////////////////////////////////////////////////////////////
index 95b00d405caeca1dc971b37a94bbadc566f3074b..511e07c549829330e8f19474c1ad590f306c5f35 100644 (file)
--- a/jpegint.h
+++ b/jpegint.h
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : February 4, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file provides common declarations for the various JPEG modules.
  * These declarations are considered internal to the JPEG library; most
  * applications using the library shouldn't need to include this file.
@@ -291,6 +298,19 @@ struct jpeg_color_quantizer {
 #endif
 
 
+/* SIMD Ext: This macro checks if constants for SSE/SSE2 instructions are
+ * aligned to a 16-byte boundary. Most of SSE/SSE2 instructions require
+ * that the memory operand is aligned to a 16-byte boundary; if not,
+ * a general-protection exception (#GP) is generated.
+ */
+
+#ifdef JSIMD_NO_SSECONST_ALIGNMENT_CHECK
+#define IS_CONST_ALIGNED_16(p) (1)
+#else
+#define IS_CONST_ALIGNED_16(p) (((unsigned)(p) & 0x0F) == 0)
+#endif
+
+
 /* Short forms of external names for systems with brain-damaged linkers. */
 
 #ifdef NEED_SHORT_EXTERNAL_NAMES
@@ -327,6 +347,8 @@ struct jpeg_color_quantizer {
 #define jzero_far              jZeroFar
 #define jpeg_zigzag_order      jZIGTable
 #define jpeg_natural_order     jZAGTable
+#define jpeg_simd_cpu_support  jSiCpuSupport
+#define jpeg_simd_os_support   jSiOsSupport
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
@@ -382,6 +404,10 @@ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
 #endif
 extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
 
+/* SIMD Ext: retrieve SIMD/CPU information */
+EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void));
+EXTERN(unsigned int) jpeg_simd_os_support JPP((unsigned int simd));
+
 /* Suppress undefined-structure complaints if necessary. */
 
 #ifdef INCOMPLETE_TYPES_BROKEN
index d1be8ddeff1bfee59f56d3ea04379f4b6f4d1c0e..0506316720dc25f98361b70e612d626ab1b46131 100644 (file)
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified for SIMD extension.
+ * Last Modified : February 4, 2006
+ * ---------------------------------------------------------------------
+ *
  * This file defines the application interface for the JPEG library.
  * Most applications using the library need only include this file,
  * and perhaps jerror.h if they want to know the exact error codes.
 #ifndef JPEGLIB_H
 #define JPEGLIB_H
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * First we include the configuration files that record how this
  * installation of the JPEG library is set up.  jconfig.h can be
 #define JPEG_LIB_VERSION  62   /* Version 6b */
 
 
+/* SIMD Ext: Version ID for the SIMD extension.
+ */
+
+#define JPEG_SIMDEXT_VERSION  102      /* version 1.02 */
+#define JPEG_SIMDEXT_VER_STR  "1.02"
+
+
 /* Various constants determining the sizes of things.
  * All of these are specified by the JPEG standard, so don't change them
  * if you want to be compatible.
@@ -235,6 +253,15 @@ typedef enum {
        JDITHER_FS              /* Floyd-Steinberg error diffusion dither */
 } J_DITHER_MODE;
 
+/* SIMD Ext: bitflags for jpeg_simd_support() and jpeg_simd_mask() */
+
+#define JSIMD_NONE    0x00
+#define JSIMD_MMX     0x01
+#define JSIMD_3DNOW   0x02
+#define JSIMD_SSE     0x04
+#define JSIMD_SSE2    0x08
+#define JSIMD_ALL     (JSIMD_MMX | JSIMD_3DNOW | JSIMD_SSE | JSIMD_SSE2)
+
 
 /* Common fields between JPEG compression and decompression master structs. */
 
@@ -877,6 +904,18 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
 #define jpeg_abort             jAbort
 #define jpeg_destroy           jDestroy
 #define jpeg_resync_to_restart jResyncRestart
+#define jpeg_simd_support      jSiSupport
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+#define jpeg_simd_mask         jSiMask
+#endif
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+#define jpeg_simd_color_converter      jSiCColor
+#define jpeg_simd_downsampler          jSiDownsampler
+#define jpeg_simd_forward_dct          jSiFDCT
+#define jpeg_simd_color_deconverter    jSiDColor
+#define jpeg_simd_upsampler            jSiUpsampler
+#define jpeg_simd_inverse_dct          jSiIDCT
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 
@@ -1037,6 +1076,24 @@ EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
 EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
                                            int desired));
 
+/* SIMD Ext: retrieve SIMD/CPU information */
+EXTERN(unsigned int) jpeg_simd_support JPP((j_common_ptr cinfo));
+#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED
+EXTERN(unsigned int) jpeg_simd_mask
+       JPP((j_common_ptr cinfo, unsigned int remove, unsigned int add));
+#endif
+#ifndef JSIMD_MODEINFO_NOT_SUPPORTED
+EXTERN(unsigned int) jpeg_simd_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(unsigned int) jpeg_simd_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(unsigned int) jpeg_simd_forward_dct JPP((j_compress_ptr cinfo,
+                                               int method));
+EXTERN(unsigned int) jpeg_simd_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(unsigned int) jpeg_simd_upsampler JPP((j_decompress_ptr cinfo,
+                                             int do_fancy));
+EXTERN(unsigned int) jpeg_simd_inverse_dct JPP((j_decompress_ptr cinfo,
+                                               int method));
+#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */
+
 
 /* These marker codes are exported since applications and data source modules
  * are likely to want to use them.
@@ -1093,4 +1150,8 @@ struct jpeg_color_quantizer { long dummy; };
 #include "jerror.h"            /* fetch error codes too */
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* JPEGLIB_H */
diff --git a/jsimdcpu.asm b/jsimdcpu.asm
new file mode 100644 (file)
index 0000000..1c851d1
--- /dev/null
@@ -0,0 +1,112 @@
+;
+; jsimdcpu.asm - SIMD instruction support check
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : August 23, 2005
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+       SECTION SEG_CONST
+
+       alignz  16
+
+copyright:
+       db      " x86 SIMD ext for IJG lib V", JPEG_SIMDEXT_VER_STR
+       db      " Copyright 2006, MIYASAKA Masaru "
+
+       alignz  16
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Check if the CPU supports SIMD instructions
+;
+; GLOBAL(unsigned int)
+; jpeg_simd_cpu_support (void)
+;
+
+       align   16
+       global  EXTN(jpeg_simd_cpu_support)
+
+EXTN(jpeg_simd_cpu_support):
+       push    ebx
+;      push    ecx             ; need not be preserved
+;      push    edx             ; need not be preserved
+;      push    esi             ; unused
+       push    edi
+
+       xor     edi,edi                 ; simd support flag
+
+       pushfd
+       pop     eax
+       mov     edx,eax
+       xor     eax, 1<<21              ; flip ID bit in EFLAGS
+       push    eax
+       popfd
+       pushfd
+       pop     eax
+       xor     eax,edx
+       jz      short .return           ; CPUID is not supported
+
+       ; Check for MMX, SSE and SSE2 instruction support
+       xor     eax,eax
+       cpuid
+       test    eax,eax
+       jz      short .return
+
+       xor     eax,eax
+       inc     eax
+       cpuid
+       mov     eax,edx                 ; eax = Standard feature flags
+
+       test    eax, 1<<23              ; bit23:MMX
+       jz      short .no_mmx
+       or      edi, byte JSIMD_MMX
+.no_mmx:
+       test    eax, 1<<25              ; bit25:SSE
+       jz      short .no_sse
+       or      edi, byte JSIMD_SSE
+.no_sse:
+       test    eax, 1<<26              ; bit26:SSE2
+       jz      short .no_sse2
+       or      edi, byte JSIMD_SSE2
+.no_sse2:
+
+       ; Check for 3DNow! instruction support
+       mov     eax, 0x80000000
+       cpuid
+       cmp     eax, 0x80000000
+       jbe     short .return
+
+       mov     eax, 0x80000001
+       cpuid
+       mov     eax,edx                 ; eax = Extended feature flags
+
+       test    eax, 1<<31              ; bit31:3DNow!(vendor independent)
+       jz      short .no_3dnow
+       or      edi, byte JSIMD_3DNOW
+.no_3dnow:
+
+.return:
+       mov     eax,edi
+
+       pop     edi
+;      pop     esi             ; unused
+;      pop     edx             ; need not be preserved
+;      pop     ecx             ; need not be preserved
+       pop     ebx
+       ret
+
diff --git a/jsimddjg.asm b/jsimddjg.asm
new file mode 100644 (file)
index 0000000..02c82e4
--- /dev/null
@@ -0,0 +1,130 @@
+;
+; jsimddjg.asm - SIMD instruction support check (for DJGPP V.2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : September 26, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Check if the OS supports SIMD instructions (DJGPP V.2)
+;
+; GLOBAL(unsigned int)
+; jpeg_simd_os_support (unsigned int simd)
+;
+
+%define EXCEPTION_ILLEGAL_INSTRUCTION  6       ; vector number of #UD
+
+%define simd   ebp+8                   ; unsigned int simd
+%define mxcsr  ebp-4                   ; unsigned int mxcsr = 0x1F80
+
+       align   16
+       global  EXTN(jpeg_simd_os_support)
+
+EXTN(jpeg_simd_os_support):
+       push    ebp
+       mov     ebp,esp
+       push    dword 0x1F80            ; default value of MXCSR register
+       push    ebx
+
+       push    DWORD [simd]    ; simd_flags - modified from exception_handler
+
+       mov     bl, EXCEPTION_ILLEGAL_INSTRUCTION
+       mov     ax, 0x0202      ; Get Processor Exception Handler Vector
+       int     0x31            ; DPMI function call
+       push    ecx             ; selector of old exception handler
+       push    edx             ; offset   of old exception handler
+
+       mov     ecx,cs
+       mov     edx, exception_handler
+       mov     bl, EXCEPTION_ILLEGAL_INSTRUCTION
+       mov     ax, 0x0203      ; Set Processor Exception Handler Vector
+       int     0x31            ; DPMI function call
+
+       mov     eax, DWORD [simd]
+
+       ; If floating point emulation is enabled (CR0.EM = 1),
+       ; executing an MMX/3DNow! instruction generates invalid
+       ; opcode exception (#UD).
+
+       push    byte (.mmx_1 - .mmx_0)          ; inst_bytes
+       push    byte (JSIMD_MMX | JSIMD_3DNOW)  ; test_flags
+       test    eax, DWORD [esp]
+       jz      short .mmx_1
+.mmx_0:        emms                            ; executing MMX instruction
+.mmx_1:        add     esp, byte 8
+
+       push    byte (.sse_1 - .sse_0)
+       push    byte (JSIMD_SSE | JSIMD_SSE2)
+       test    eax, DWORD [esp]
+       jz      short .sse_1
+.sse_0:        ldmxcsr DWORD [mxcsr]           ; executing SSE instruction
+.sse_1:        add     esp, byte 8
+
+       pop     edx             ; offset   of old exception handler
+       pop     ecx             ; selector of old exception handler
+       mov     bl, EXCEPTION_ILLEGAL_INSTRUCTION
+       mov     ax, 0x0203      ; Set Processor Exception Handler Vector
+       int     0x31            ; DPMI function call
+
+       pop     eax             ; return simd_flags
+       and     eax, byte JSIMD_ALL
+
+       pop     ebx
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; LOCAL(void) far
+; exception_handler (unsigned long error_code,
+;                    void * context_eip, unsigned short context_cs,
+;                    unsigned long context_eflags,
+;                    void * context_esp, unsigned short context_ss);
+;
+
+%define error_code     esp+12+8        ; unsigned long error_code
+%define context_eip    esp+12+12       ; void * context_eip
+%define context_cs     esp+12+16       ; unsigned short context_cs
+%define context_eflags esp+12+20       ; unsigned long context_eflags
+%define context_esp    esp+12+24       ; void * context_esp
+%define context_ss     esp+12+28       ; unsigned short context_ss
+
+%define test_flags(b)  (b)+0
+%define inst_bytes(b)  (b)+4
+%define simd_flags(b)  (b)+16
+
+       align   16
+
+exception_handler:
+       push    eax
+       push    ecx
+       push    edx
+
+       mov     eax, POINTER [context_esp]
+       mov     ecx, DWORD [test_flags(eax)]
+       mov     edx, DWORD [inst_bytes(eax)]
+       not     ecx
+       add     POINTER [context_eip], edx      ; next instruction
+       and     DWORD [simd_flags(eax)], ecx    ; turn off flag
+
+       pop     edx
+       pop     ecx
+       pop     eax
+       retf
+
diff --git a/jsimdext.inc b/jsimdext.inc
new file mode 100644 (file)
index 0000000..a502c07
--- /dev/null
@@ -0,0 +1,347 @@
+;
+; jsimdext.inc - common declarations
+;
+; x86 SIMD extension for IJG JPEG library - version 1.02
+;
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+;
+; This software is provided 'as-is', without any express or implied
+; warranty.  In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+;    claim that you wrote the original software. If you use this software
+;    in a product, an acknowledgment in the product documentation would be
+;    appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+;    misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+;
+; Last Modified : February 4, 2006
+;
+; [TAB8]
+
+%ifndef JSIMDCFG_INCLUDED      ; in case jsimdcfg.inc already did
+%include "jsimdcfg.inc"                ; configuration declarations
+%endif
+
+; ==========================================================================
+;  System-dependent configurations
+
+%ifdef WIN32   ; ----(nasm -fwin32 -DWIN32 ...)--------
+; * Microsoft Visual C++
+; * MinGW (Minimalist GNU for Windows)
+; * CygWin
+; * LCC-Win32
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text  align=16 public use32 class=CODE
+%define SEG_CONST   .rdata align=16 public use32 class=CONST
+
+%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
+; * Borland C++ (Win32)
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text  align=16 public use32 class=CODE
+%define SEG_CONST   .data  align=16 public use32 class=DATA
+
+%elifdef ELF   ; ----(nasm -felf -DELF ...)------------
+; * Linux
+; * *BSD family Unix using elf format
+; * Unix System V, including Solaris x86, UnixWare and SCO Unix
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text   progbits alloc exec   nowrite align=16
+%define SEG_CONST   .rodata progbits alloc noexec nowrite align=16
+
+; To make the code position-independent, append -DPIC to the commandline
+;
+%define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_      ; ELF supports PIC
+%define EXTN(name)  name                       ; foo() -> foo
+
+%elifdef AOUT  ; ----(nasm -faoutb/aout -DAOUT ...)----
+; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
+; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text
+%define SEG_CONST   .data
+
+; To make the code position-independent, append -DPIC to the commandline
+;
+%define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_     ; BSD-style a.out supports PIC
+
+%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
+; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text  ;align=16   ; nasm doesn't accept align=16. why?
+%define SEG_CONST   .rodata align=16
+
+; The generation of position-independent code (PIC) is the default on Darwin.
+;
+%define PIC
+%define GOT_SYMBOL  _MACHO_PIC_                ; Mach-O style code-relative addressing
+
+%else          ; ----(Other case)----------------------
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text
+%define SEG_CONST   .data
+
+%endif ; ----------------------------------------------
+
+; ==========================================================================
+
+; ---- jpeglib.h -----------------------------------------------------------
+
+%define DCTSIZE                8       ; The basic DCT block is 8x8 samples
+%define DCTSIZE2       64      ; DCTSIZE squared; # of elements in a block
+
+%define JSIMD_NONE     0x00    ; bitflags for jpeg_simd_*_support()
+%define JSIMD_MMX      0x01
+%define JSIMD_3DNOW    0x02
+%define JSIMD_SSE      0x04
+%define JSIMD_SSE2     0x08
+%define JSIMD_ALL      (JSIMD_MMX | JSIMD_3DNOW | JSIMD_SSE | JSIMD_SSE2)
+
+; ---- jpegint.h -----------------------------------------------------------
+
+; Short forms of external names for systems with brain-damaged linkers.
+;
+%ifdef NEED_SHORT_EXTERNAL_NAMES
+%define jpeg_simd_cpu_support  jSiCpuSupport
+%define jpeg_simd_os_support   jSiOsSupport
+%endif ; NEED_SHORT_EXTERNAL_NAMES
+
+; ---- jmorecfg.h ----------------------------------------------------------
+;
+; BITS_IN_JSAMPLE==8 (8-bit sample values) is the only valid setting
+; on this SIMD implementation.
+;
+%define BITS_IN_JSAMPLE        8       ; Caution: Cannot be changed
+
+; Representation of a single sample (pixel element value).
+; On this SIMD implementation, this must be 'unsigned char'.
+;
+%define JSAMPLE                byte            ; unsigned char
+%define SIZEOF_JSAMPLE SIZEOF_BYTE     ; sizeof(JSAMPLE)
+%define MAXJSAMPLE     255
+%define CENTERJSAMPLE  128
+
+; Representation of a DCT frequency coefficient.
+; On this SIMD implementation, this must be 'short'.
+;
+%define JCOEF          word            ; short
+%define SIZEOF_JCOEF   SIZEOF_WORD     ; sizeof(JCOEF)
+
+; INT32 must hold at least signed 32-bit values.
+; On this SIMD implementation, this must be 'long'.
+;
+%define INT32          dword           ; long
+%define SIZEOF_INT32   SIZEOF_DWORD    ; sizeof(INT32)
+
+; Datatype used for image dimensions.
+; On this SIMD implementation, this must be 'unsigned int'.
+;
+%define JDIMENSION             dword           ; unsigned int
+%define SIZEOF_JDIMENSION      SIZEOF_DWORD    ; sizeof(JDIMENSION)
+
+; --------------------------------------------------------------------------
+
+%define JSAMPROW               POINTER         ; JSAMPLE FAR * (jpeglib.h)
+%define JSAMPARRAY             POINTER         ; JSAMPROW *    (jpeglib.h)
+%define JSAMPIMAGE             POINTER         ; JSAMPARRAY *  (jpeglib.h)
+%define JCOEFPTR               POINTER         ; JCOEF FAR *   (jpeglib.h)
+%define SIZEOF_JSAMPROW                SIZEOF_POINTER  ; sizeof(JSAMPROW)
+%define SIZEOF_JSAMPARRAY      SIZEOF_POINTER  ; sizeof(JSAMPARRAY)
+%define SIZEOF_JSAMPIMAGE      SIZEOF_POINTER  ; sizeof(JSAMPIMAGE)
+%define SIZEOF_JCOEFPTR                SIZEOF_POINTER  ; sizeof(JCOEFPTR)
+
+%define POINTER                        dword           ; general pointer type
+%define SIZEOF_POINTER         SIZEOF_DWORD    ; sizeof(POINTER)
+%define POINTER_BIT            DWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
+
+%define INT                    dword           ; signed integer type
+%define SIZEOF_INT             SIZEOF_DWORD    ; sizeof(INT)
+%define INT_BIT                        DWORD_BIT       ; sizeof(INT)*BYTE_BIT
+
+%define FP32                   dword           ; IEEE754 single
+%define SIZEOF_FP32            SIZEOF_DWORD    ; sizeof(FP32)
+%define FP32_BIT               DWORD_BIT       ; sizeof(FP32)*BYTE_BIT
+
+%define FP64                   qword           ; IEEE754 double
+%define SIZEOF_FP64            SIZEOF_QWORD    ; sizeof(FP64)
+%define FP64_BIT               QWORD_BIT       ; sizeof(FP64)*BYTE_BIT
+
+%define FP80                   tword           ; IEEE754 double-extended(x86)
+%define SIZEOF_FP80            SIZEOF_TWORD    ; sizeof(FP80)
+%define FP80_BIT               TWORD_BIT       ; sizeof(FP80)*BYTE_BIT
+
+%define MMWORD                 qword           ; int64  (MMX register)
+%define SIZEOF_MMWORD          SIZEOF_QWORD    ; sizeof(MMWORD)
+%define MMWORD_BIT             QWORD_BIT       ; sizeof(MMWORD)*BYTE_BIT
+
+%define XMMWORD                        dqword          ; int128 (SSE register)
+%define SIZEOF_XMMWORD         SIZEOF_DQWORD   ; sizeof(XMMWORD)
+%define XMMWORD_BIT            DQWORD_BIT      ; sizeof(XMMWORD)*BYTE_BIT
+
+%define SIZEOF_BYTE            1               ; sizeof(BYTE)
+%define SIZEOF_WORD            2               ; sizeof(WORD)
+%define SIZEOF_DWORD           4               ; sizeof(DWORD)
+%define SIZEOF_QWORD           8               ; sizeof(QWORD)
+%define SIZEOF_TBYTE           10              ; sizeof(TBYTE)
+%define SIZEOF_TWORD           10              ; sizeof(TWORD)
+%define SIZEOF_DQWORD          16              ; sizeof(DQWORD)
+
+%define BYTE_BIT               8               ; CHAR_BIT in C
+%define WORD_BIT               16              ; sizeof(WORD)*BYTE_BIT
+%define DWORD_BIT              32              ; sizeof(DWORD)*BYTE_BIT
+%define QWORD_BIT              64              ; sizeof(QWORD)*BYTE_BIT
+%define TBYTE_BIT              80              ; sizeof(TBYTE)*BYTE_BIT
+%define TWORD_BIT              80              ; sizeof(TWORD)*BYTE_BIT
+%define DQWORD_BIT             128             ; sizeof(DQWORD)*BYTE_BIT
+
+%idefine TBYTE TWORD   ; NASM uses the keyword 'TWORD' instead of 'TBYTE'
+%idefine DQWORD                ; currently not supported by NASM
+%idefine _MMWORD       ;
+%idefine _DWORD                ;
+
+; --------------------------------------------------------------------------
+;  External Symbol Name
+;
+%ifndef EXTN
+%define EXTN(name)   _ %+ name         ; foo() -> _foo
+%endif
+
+; --------------------------------------------------------------------------
+;  Macros for position-independent code (PIC) support
+;
+%ifndef GOT_SYMBOL
+%undef PIC
+%endif
+
+%ifdef PIC ; -------------------------------------------
+
+%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
+
+; At present, nasm doesn't seem to support PIC generation for Mach-O.
+; The PIC support code below is a little tricky.
+
+       SECTION SEG_CONST
+const_base:
+
+%define GOTOFF(got,sym) (got) + (sym) - const_base
+
+%imacro get_GOT        1
+       ; NOTE: this macro destroys ecx resister.
+       call    %%geteip
+       add     ecx, byte (%%ref - $)
+       jmp     short %%adjust
+%%geteip:
+       mov     ecx, POINTER [esp]
+       ret
+%%adjust:
+       push    ebp
+       xor     ebp,ebp         ; ebp = 0
+%ifidni %1,ebx ; (%1 == ebx)
+       ; db 0x8D,0x9C + jmp near const_base =
+       ;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
+       db      0x8D,0x9C               ; 8D,9C
+       jmp     near const_base         ; E9,(const_base-%%ref)
+%%ref:
+%else  ; (%1 != ebx)
+       ; db 0x8D,0x8C + jmp near const_base =
+       ;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
+       db      0x8D,0x8C               ; 8D,8C
+       jmp     near const_base         ; E9,(const_base-%%ref)
+%%ref: mov     %1, ecx
+%endif ; (%1 == ebx)
+       pop     ebp
+%endmacro
+
+%else  ; GOT_SYMBOL != _MACHO_PIC_ ----------------
+
+%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
+
+%imacro get_GOT        1
+       extern  GOT_SYMBOL
+       call    %%geteip
+       add     %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
+       jmp     short %%done
+%%geteip:
+       mov     %1, POINTER [esp]
+       ret
+%%done:
+%endmacro
+
+%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
+
+%imacro pushpic        1.nolist
+       push    %1
+%endmacro
+%imacro poppic 1.nolist
+       pop     %1
+%endmacro
+%imacro movpic 2.nolist
+       mov     %1,%2
+%endmacro
+
+%else  ; !PIC -----------------------------------------
+
+%define GOTOFF(got,sym) (sym)
+
+%imacro get_GOT        1.nolist
+%endmacro
+%imacro pushpic        1.nolist
+%endmacro
+%imacro poppic 1.nolist
+%endmacro
+%imacro movpic 2.nolist
+%endmacro
+
+%endif ;  PIC -----------------------------------------
+
+; --------------------------------------------------------------------------
+;  Align the next instruction on {2,4,8,16,..}-byte boundary.
+;  ".balign n,,m" in GNU as
+;
+%define MSKLE(x,y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
+%define FILLB(b,n)  (($$-(b)) & ((n)-1))
+
+%imacro alignx 1-2.nolist 0xFFFF
+%%bs:  times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
+              db 0x90                               ; nop
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
+              db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
+              db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
+              db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
+              db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
+              db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
+              db 0x8B,0xED                          ; mov ebp,ebp
+       times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
+              db 0x90                               ; nop
+%endmacro
+
+; Align the next data on {2,4,8,16,..}-byte boundary.
+;
+%imacro alignz 1.nolist
+       align %1, db 0          ; filling zeros
+%endmacro
+
+; --------------------------------------------------------------------------
diff --git a/jsimdgcc.c b/jsimdgcc.c
new file mode 100644 (file)
index 0000000..d6ad75b
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * jsimdgcc.c - SIMD instruction support check (gcc)
+ *
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * Last Modified : January 24, 2006
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#include <setjmp.h>
+#include <signal.h>
+
+
+static volatile int lockf /* = 0 */;
+static jmp_buf jmpbuf;
+
+
+/*
+ * Exception handler for signal()
+ */
+
+LOCAL(void)
+exception_handler (int sig)
+{
+  signal(SIGILL, SIG_DFL);
+  longjmp(jmpbuf, 1);
+}
+
+
+/*
+ * Check if the OS supports SIMD instructions
+ */
+
+GLOBAL(unsigned int)
+jpeg_simd_os_support (unsigned int simd)
+{
+#ifdef __GNUC__                /* gcc (i386) */
+  unsigned int mxcsr = 0x1F80;
+
+  /* enter critical section */
+  __asm__ __volatile__ (
+  "get_lock:                  \n\t"
+    "movl  $1,%%eax           \n\t"
+    "xchgl %0,%%eax           \n\t"    /* try to get lock */
+    "cmpl  $0,%%eax           \n\t"    /* test if successful */
+    "je    critical_section   \n"
+  "spin_loop:                 \n\t"
+  /*".byte 0xF3,0x90          \n\t"*/  /* "pause" on P4 (short delay) */
+    "cmpl  $0,%0              \n\t"    /* check if lock is free */
+    "jne   spin_loop          \n\t"
+    "jmp   get_lock           \n"
+  "critical_section:          \n\t"
+     : "=m" (lockf) : "m" (lockf) : "%eax"
+  );
+
+  /* If floating point emulation is enabled (CR0.EM = 1),
+   * executing an MMX/3DNow! instruction generates invalid
+   * opcode exception (#UD).
+   */
+  if (simd & (JSIMD_MMX | JSIMD_3DNOW)) {
+    if (!setjmp(jmpbuf)) {
+      signal(SIGILL, exception_handler);
+      __asm__ __volatile__ (
+        ".byte 0x0F,0x77"              /* emms */
+      );
+      signal(SIGILL, SIG_DFL);
+    } else {
+      simd &= ~(JSIMD_MMX | JSIMD_3DNOW);
+    }
+  }
+  if (simd & (JSIMD_SSE | JSIMD_SSE2)) {
+    if (!setjmp(jmpbuf)) {
+      signal(SIGILL, exception_handler);
+      __asm__ __volatile__ (
+        "leal  %0,%%eax        \n\t"
+        ".byte 0x0F,0xAE,0x10  \n\t"   /* ldmxcsr [eax] */
+         : : "m" (mxcsr) : "%eax"
+      );
+      signal(SIGILL, SIG_DFL);
+    } else {
+      simd &= ~(JSIMD_SSE | JSIMD_SSE2);
+    }
+  }
+
+  /* leave critical section */
+  lockf = 0;   /* release lock */
+#endif /* __GNUC__ */
+
+  return simd;
+}
diff --git a/jsimdw32.asm b/jsimdw32.asm
new file mode 100644 (file)
index 0000000..7f2cdbc
--- /dev/null
@@ -0,0 +1,121 @@
+;
+; jsimdw32.asm - SIMD instruction support check (for Win32)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; Last Modified : September 26, 2004
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+       SECTION SEG_TEXT
+       BITS    32
+;
+; Check if the OS supports SIMD instructions (Win32)
+;
+; Reference: "Win32 Exception handling for assembler programmers"
+;               http://www.jorgon.freeserve.co.uk/Except/Except.htm
+;
+; GLOBAL(unsigned int)
+; jpeg_simd_os_support (unsigned int simd)
+;
+
+%define simd   ebp+8                   ; unsigned int simd
+%define mxcsr  ebp-4                   ; unsigned int mxcsr = 0x1F80
+
+       align   16
+       global  EXTN(jpeg_simd_os_support)
+
+EXTN(jpeg_simd_os_support):
+       push    ebp
+       mov     ebp,esp
+       push    dword 0x1F80            ; default value of MXCSR register
+       push    exception_handler
+       push    POINTER [fs:0]          ; prev_record_ptr
+       mov     POINTER [fs:0], esp     ; this_record_ptr
+
+       mov     eax, DWORD [simd]
+       and     eax, byte JSIMD_ALL
+       xor     ecx,ecx
+       xor     edx,edx
+
+       ; If floating point emulation is enabled (CR0.EM = 1),
+       ; executing an MMX/3DNow! instruction generates invalid
+       ; opcode exception (#UD).
+
+       mov     cl, (JSIMD_MMX | JSIMD_3DNOW)
+       mov     dl, (.mmx_1 - .mmx_0)
+       test    al,cl
+       jz      short .mmx_1
+.mmx_0:        emms                            ; executing MMX instruction
+.mmx_1:
+       mov     cl, (JSIMD_SSE | JSIMD_SSE2)
+       mov     dl, (.sse_1 - .sse_0)
+       test    al,cl
+       jz      short .sse_1
+.sse_0:        ldmxcsr DWORD [mxcsr]           ; executing SSE instruction
+.sse_1:
+
+       pop     POINTER [fs:0]          ; prev_record_ptr
+       mov     esp,ebp
+       pop     ebp
+       ret
+
+; --------------------------------------------------------------------------
+;
+; LOCAL(EXCEPTION_DISPOSITION)
+; exception_handler (struct _EXCEPTION_RECORD * ExceptionRecord,
+;                    void * EstablisherFrame, struct _CONTEXT * ContextRecord,
+;                    void * DispatcherContext);
+;
+
+%define ExceptionContinueExecution  0  ; from <excpt.h>
+%define ExceptionContinueSearch     1  ; typedef enum _EXCEPTION_DISPOSITION {
+%define ExceptionNestedException    2  ;   ...
+%define ExceptionCollidedUnwind     3  ; } EXCEPTION_DISPOSITION
+
+%define EXCEPTION_ILLEGAL_INSTRUCTION   0xC000001D     ; from <winbase.h>
+
+%define ExceptionRecord     esp+4      ; struct _EXCEPTION_RECORD *
+%define EstablisherFrame    esp+8      ; void * EstablisherFrame
+%define ContextRecord       esp+12     ; struct _CONTEXT * ContextRecord
+%define DispatcherContext   esp+16     ; void * DispatcherContext
+
+%define ExceptionCode(b)    (b)+0      ; ExceptionRecord->ExceptionCode
+%define ExceptionFlags(b)   (b)+4      ; ExceptionRecord->ExceptionFlags
+%define Context_Edx(b)      (b)+168    ; ContextRecord->Edx
+%define Context_Ecx(b)      (b)+172    ; ContextRecord->Ecx
+%define Context_Eax(b)      (b)+176    ; ContextRecord->Eax
+%define Context_Eip(b)      (b)+184    ; ContextRecord->Eip
+
+       align   16
+
+exception_handler:
+       mov     edx, POINTER [ExceptionRecord]
+       mov     eax, ExceptionContinueSearch
+
+       cmp     DWORD [ExceptionFlags(edx)], byte 0
+       jne     short .return                   ; noncontinuable exception
+       cmp     DWORD [ExceptionCode(edx)], EXCEPTION_ILLEGAL_INSTRUCTION
+       jne     short .return                   ; not a #UD exception
+
+       mov     eax, POINTER [ContextRecord]
+       mov     ecx, DWORD [Context_Ecx(eax)]
+       mov     edx, DWORD [Context_Edx(eax)]
+       not     ecx
+       add     DWORD [Context_Eip(eax)], edx   ; next instruction
+       and     DWORD [Context_Eax(eax)], ecx   ; turn off flag
+       mov     eax, ExceptionContinueExecution
+.return:
+       ret
+
diff --git a/libjpeg.spec b/libjpeg.spec
new file mode 100644 (file)
index 0000000..2c9c224
--- /dev/null
@@ -0,0 +1,234 @@
+%define LIBVER 62.1.0
+Summary: A library for manipulating JPEG image format files (with SIMD support)
+Summary(ja): JPEG ·Á¼°²èÁü¥Õ¥¡¥¤¥ë¤ò°·¤¦°Ù¤Î¥é¥¤¥Ö¥é¥ê (x86 SIMD ÂбþÈÇ)
+Name: libjpeg
+Version: 6bx1.02
+Release: 1
+License: distributable
+Group: System Environment/Libraries
+Source0: http://cetus.sakura.ne.jp/softlab/jpeg-x86simd/sources/jpegsrc-6b-x86simd-1.02.tar.bz2
+Buildroot: %{_tmppath}/%{name}-%{version}-root
+ExclusiveArch: %{ix86}
+BuildPrereq: nasm >= 0.98.25
+
+%package devel
+Summary: Development tools for programs which will use the libjpeg library.
+Summary(ja): libjpeg ¥é¥¤¥Ö¥é¥ê¤ò»È¤¦¥×¥í¥°¥é¥à¸þ¤±³«È¯¥Ä¡¼¥ë
+Group: Development/Libraries
+Requires: libjpeg = %{version}-%{release}
+
+%description
+The libjpeg package contains a library of functions for manipulating
+JPEG images, as well as simple client programs for accessing the
+libjpeg functions.  Libjpeg client programs include cjpeg, djpeg,
+jpegtran, rdjpgcom and wrjpgcom.  Cjpeg compresses an image file into
+JPEG format.  Djpeg decompresses a JPEG file into a regular image
+file.  Jpegtran can perform various useful transformations on JPEG
+files.  Rdjpgcom displays any text comments included in a JPEG file.
+Wrjpgcom inserts text comments into a JPEG file.
+
+The libjpeg library in this package uses SIMD instructions if available.
+On a processor that supports SIMD instructions (MMX, SSE, etc),
+it runs 2-3 times faster than the original version of libjpeg.
+
+%description -l ja
+libjpeg ¥Ñ¥Ã¥±¡¼¥¸¤Ë¤Ï JPEG ²èÁü¤ò°·¤¦°Ù¤ËɬÍפʥ饤¥Ö¥é¥ê¤È¡¤
+libjpeg ´Ø¿ô¤Ë¥¢¥¯¥»¥¹¤¹¤ë°Ù¤Î´Êñ¤Ê¥¯¥é¥¤¥¢¥ó¥È¥×¥í¥°¥é¥à¤¬
+¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡¥libjpeg ¥¯¥é¥¤¥¢¥ó¥È¥×¥í¥°¥é¥à¤Ë¤Ï cjpeg, djpeg,
+jpegtran, rdjpgcom, wrjpgcom ¤¬¤¢¤ê¤Þ¤¹¡¥cjpeg ¤Ï²èÁü¥Õ¥¡¥¤¥ë¤ò
+JPEG ·Á¼°¤Ë°µ½Ì¤·¤Þ¤¹¡¥djpeg ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤òÄ̾ï¤Î²èÁü¥Õ¥¡¥¤¥ë¤Ë
+Ÿ³«¤·¤Þ¤¹¡¥jpegtran ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤ËÍÍ¡¹¤ÊÊÑ´¹¤ò»Ü¤¹¤³¤È¤¬½ÐÍè¤Þ¤¹¡¥
+rdjpgcom ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ë¥Æ¥­¥¹¥È·Á¼°¤Î¥³¥á¥ó¥È¤òɽ¼¨¤·¡¤
+wrjpgcom ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤Ë¥Æ¥­¥¹¥È·Á¼°¤Î¥³¥á¥ó¥È¤òÄɲä·¤Þ¤¹¡¥
+
+¤³¤Î¥Ñ¥Ã¥±¡¼¥¸¤Ë¼ý¤á¤é¤ì¤Æ¤¤¤ë libjpeg ¥é¥¤¥Ö¥é¥ê¤Ï¡¢x86 SIMD ÂбþÈǤǤ¹¡£
+MMX ¤ä SSE ¤Ê¤É¤Î SIMD ±é»»µ¡Ç½¤òÁõÈ÷¤·¤Æ¤¤¤ë¥×¥í¥»¥Ã¥µ¾å¤Çưºî¤µ¤»¤ë¤È¡¢
+¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤ÈÈæ³Ó¤·¤Æ 2¡Á3ÇÜÄøÅ٤ήÅÙ¤ÇÆ°ºî¤·¤Þ¤¹¡£
+
+%description devel
+The libjpeg-devel package includes the header files and static libraries
+necessary for developing programs which will manipulate JPEG files using
+the libjpeg library.
+
+If you are going to develop programs which will manipulate JPEG images,
+you should install libjpeg-devel.  You'll also need to have the libjpeg
+package installed.
+
+%description devel -l ja
+libjpeg-devel ¥Ñ¥Ã¥±¡¼¥¸¤Ë¤Ï¡¤libjpeg ¥é¥¤¥Ö¥é¥ê¤ò»È¤Ã¤Æ JPEG ¥Õ¥¡¥¤¥ë¤ò
+°·¤¦¥×¥í¥°¥é¥à¤ò³«È¯¤¹¤ë¤Î¤ËɬÍפʥإåÀ¥Õ¥¡¥¤¥ë¤È¥¹¥¿¥Æ¥£¥Ã¥¯¥é¥¤¥Ö¥é¥ê¤¬
+¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡¥
+
+JPEG ²èÁü¤ò°·¤¦¥×¥í¥°¥é¥à¤ò³«È¯¤¹¤ëºÝ¤Ë¤Ï¡¤libjpeg-devel ¤ò
+¥¤¥ó¥¹¥È¡¼¥ë¤·¤Æ²¼¤µ¤¤¡¥Æ±»þ¤Ë libjpeg ¥Ñ¥Ã¥±¡¼¥¸¤â¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë
+ɬÍפ¬¤¢¤ê¤Þ¤¹¡¥
+
+%prep
+%setup -q -n jpeg-6bx
+# suppress "libtoolize --copy --force"
+mv configure.in configure.in_
+
+%build
+%configure --enable-shared --enable-static
+
+make libdir=%{_libdir} %{?_smp_mflags}
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD make test
+
+%install
+rm -rf $RPM_BUILD_ROOT
+
+%makeinstall
+#strip -R .comment $RPM_BUILD_ROOT/usr/bin/* || :
+#/sbin/ldconfig -n $RPM_BUILD_ROOT/%{_libdir}
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%doc usage.doc wizard.doc README
+%{_libdir}/libjpeg.so.*
+%{_bindir}/*
+%{_mandir}/*/*
+
+%files devel
+%defattr(-,root,root)
+%doc libjpeg.doc coderules.doc structure.doc example.c
+%doc simd_*.txt
+%{_libdir}/*.a
+%{_libdir}/*.la
+%{_libdir}/*.so
+/usr/include/*.h
+
+%changelog
+* Sat Feb 04 2006 MIYASAKA Masaru <alkaid@coral.ocn.ne.jp> - 6bx1.02-1
+- upgraded to 6bx1.02
+
+* Thu Jan 26 2006 MIYASAKA Masaru <alkaid@coral.ocn.ne.jp> - 6bx1.01-1
+- upgraded to 6bx1.01
+
+* Thu Mar 24 2005 MIYASAKA Masaru <alkaid@coral.ocn.ne.jp> - 6bx1.0-1
+- based on 6b-33 from Fedora Core 3 and modified for SIMD-extended libjpeg
+- added Japanese summary and description, which is delivered from Vine Linux
+- moved wizard.doc to main package
+
+* Thu Oct  7 2004 Matthias Clasen <mclasen@redhat.com> - 6b-33
+- Add URL.  (#134791)
+
+* Tue Jun 15 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Tue Mar 02 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Fri Feb 13 2004 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Thu Sep 25 2003 Jeremy Katz <katzj@redhat.com> 6b-30
+- rebuild to fix gzipped file md5sums (#91211)
+
+* Tue Sep 23 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- do not set rpath
+
+* Wed Jun 04 2003 Elliot Lee <sopwith@redhat.com>
+- rebuilt
+
+* Thu Feb 13 2003 Elliot Lee <sopwith@redhat.com> 6b-27
+- Add libjpeg-shared.patch to fix shlibs on powerpc
+
+* Tue Feb 04 2003 Florian La Roche <Florian.LaRoche@redhat.de>
+- add symlink to shared lib
+
+* Wed Jan 22 2003 Tim Powers <timp@redhat.com>
+- rebuilt
+
+* Mon Jan  6 2003 Jonathan Blandford <jrb@redhat.com>
+- add docs, #76508
+
+* Fri Dec 13 2002 Elliot Lee <sopwith@redhat.com> 6b-23
+- Merge in multilib changes
+- _smp_mflags
+
+* Tue Sep 10 2002 Than Ngo <than@redhat.com> 6b-22
+- use %%_libdir
+
+* Fri Jun 21 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Thu May 23 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Thu Jan 31 2002 Bernhard Rosenkraenzer <bero@redhat.com> 6b-19
+- Fix bug #59011
+
+* Mon Jan 28 2002 Bernhard Rosenkraenzer <bero@redhat.com> 6b-18
+- Fix bug #58982
+
+* Wed Jan 09 2002 Tim Powers <timp@redhat.com>
+- automated rebuild
+
+* Tue Jul 24 2001 Bill Nottingham <notting@redhat.com>
+- require libjpeg = %%{version}
+
+* Sun Jun 24 2001 Elliot Lee <sopwith@redhat.com>
+- Bump release + rebuild.
+
+* Mon Dec 11 2000 Than Ngo <than@redhat.com>
+- rebuilt with the fixed fileutils
+- use %%{_tmppath}
+
+* Wed Nov  8 2000 Bernhard Rosenkraenzer <bero@redhat.com>
+- fix a typo (strip -R .comment, not .comments)
+
+* Thu Jul 13 2000 Prospector <bugzilla@redhat.com>
+- automatic rebuild
+
+* Sat Jun 17 2000 Bernhard Rosenkraenzer <bero@redhat.com>
+- FHSify
+- add some C++ tweaks to the headers as suggested by bug #9822)
+
+* Wed May  5 2000 Bill Nottingham <notting@redhat.com>
+- configure tweaks for ia64; remove alpha patch (it's pointless)
+
+* Sat Feb  5 2000 Bernhard Rosenkräîzer <bero@redhat.com>
+- rebuild to get compressed man pages
+- fix description
+- some minor tweaks to the spec file
+- add docs
+- fix build on alpha (alphaev6 stuff)
+
+* Sun Mar 21 1999 Cristian Gafton <gafton@redhat.com> 
+- auto rebuild in the new build environment (release 9)
+
+* Wed Jan 13 1999 Cristian Gafton <gafton@redhat.com>
+- patch to build on arm
+- build for glibc 2.1
+
+* Mon Oct 12 1998 Cristian Gafton <gafton@redhat.com>
+- strip binaries
+
+* Mon Aug  3 1998 Jeff Johnson <jbj@redhat.com>
+- fix buildroot problem.
+
+* Tue Jun 09 1998 Prospector System <bugs@redhat.com>
+- translations modified for de
+
+* Thu Jun 04 1998 Marc Ewing <marc@redhat.com>
+- up to release 4
+- remove patch that set (improper) soname - libjpeg now does it itself
+
+* Thu May 07 1998 Prospector System <bugs@redhat.com>
+- translations modified for de, fr, tr
+
+* Fri May 01 1998 Cristian Gafton <gafton@redhat.com>
+- fixed build on manhattan
+
+* Wed Apr 08 1998 Cristian Gafton <gafton@redhat.com>
+- upgraded to version 6b
+
+* Wed Oct 08 1997 Donnie Barnes <djb@redhat.com>
+- new package to remove jpeg stuff from libgr and put in it's own package
diff --git a/ltconfig b/ltconfig
deleted file mode 100755 (executable)
index 2347e69..0000000
--- a/ltconfig
+++ /dev/null
@@ -1,1512 +0,0 @@
-#! /bin/sh
-
-# ltconfig - Create a system-specific libtool.
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-#
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# A lot of this script is taken from autoconf-2.10.
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-if test "${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi
-
-echo=echo
-if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then :
-else
-  # The Solaris and AIX default echo program unquotes backslashes.
-  # This makes it impossible to quote backslashes using
-  #   echo "$something" | sed 's/\\/\\\\/g'
-  # So, we emulate echo with printf '%s\n'
-  echo="printf %s\\n"
-  if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then :
-  else
-    # Oops.  We have no working printf.  Try to find a not-so-buggy echo.
-    echo=echo
-    IFS="${IFS=        }"; save_ifs="$IFS"; IFS="${IFS}:"
-    for dir in $PATH /usr/ucb; do
-      if test -f $dir/echo && test "X`$dir/echo '\t'`" = 'X\t'; then
-        echo="$dir/echo"
-        break
-      fi
-    done
-    IFS="$save_ifs"
-  fi
-fi
-
-# Sed substitution that helps us do robust quoting.  It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
-sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'
-
-# The name of this program.
-progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'`
-
-# Constants:
-PROGRAM=ltconfig
-PACKAGE=libtool
-VERSION=1.2
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.c $LIBS 1>&5'
-rm="rm -f"
-
-help="Try \`$progname --help' for more information."
-
-# Global variables:
-can_build_shared=yes
-enable_shared=yes
-# All known linkers require a `.a' archive for static linking.
-enable_static=yes
-ltmain=
-silent=
-srcdir=
-ac_config_guess=
-ac_config_sub=
-host=
-nonopt=
-verify_host=yes
-with_gcc=no
-with_gnu_ld=no
-
-old_AR="$AR"
-old_CC="$CC"
-old_CFLAGS="$CFLAGS"
-old_CPPFLAGS="$CPPFLAGS"
-old_LD="$LD"
-old_LN_S="$LN_S"
-old_NM="$NM"
-old_RANLIB="$RANLIB"
-
-# Parse the command line options.
-args=
-prev=
-for option
-do
-  case "$option" in
-  -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
-  *) optarg= ;;
-  esac
-
-  # If the previous option needs an argument, assign it.
-  if test -n "$prev"; then
-    eval "$prev=\$option"
-    prev=
-    continue
-  fi
-
-  case "$option" in
-  --help) cat <<EOM
-Usage: $progname [OPTION]... LTMAIN [HOST]
-
-Generate a system-specific libtool script.
-
-    --disable-shared       do not build shared libraries
-    --disable-static       do not build static libraries
-    --help                 display this help and exit
-    --no-verify            do not verify that HOST is a valid host type
-    --quiet                same as \`--silent'
-    --silent               do not print informational messages
-    --srcdir=DIR           find \`config.guess' in DIR
-    --version              output version information and exit
-    --with-gcc             assume that the GNU C compiler will be used
-    --with-gnu-ld          assume that the C compiler uses the GNU linker
-
-LTMAIN is the \`ltmain.sh' shell script fragment that provides basic libtool
-functionality.
-
-HOST is the canonical host system name [default=guessed].
-EOM
-  exit 0
-  ;;
-
-  --disable-shared) enable_shared=no ;;
-
-  --disable-static) enable_static=no ;;
-
-  --quiet | --silent) silent=yes ;;
-
-  --srcdir) prev=srcdir ;;
-  --srcdir=*) srcdir="$optarg" ;;
-
-  --no-verify) verify_host=no ;;
-
-  --version) echo "$PROGRAM (GNU $PACKAGE) $VERSION"; exit 0 ;;
-
-  --with-gcc) with_gcc=yes ;;
-  --with-gnu-ld) with_gnu_ld=yes ;;
-
-  -*)
-    echo "$progname: unrecognized option \`$option'" 1>&2
-    echo "$help" 1>&2
-    exit 1
-    ;;
-
-  *)
-    if test -z "$ltmain"; then
-      ltmain="$option"
-    elif test -z "$host"; then
-# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1
-#      if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then
-#        echo "$progname: warning \`$option' is not a valid host type" 1>&2
-#      fi
-      host="$option"
-    else
-      echo "$progname: too many arguments" 1>&2
-      echo "$help" 1>&2
-      exit 1
-    fi ;;
-  esac
-done
-
-if test -z "$ltmain"; then
-  echo "$progname: you must specify a LTMAIN file" 1>&2
-  echo "$help" 1>&2
-  exit 1
-fi
-
-if test -f "$ltmain"; then :
-else
-  echo "$progname: \`$ltmain' does not exist" 1>&2
-  echo "$help" 1>&2
-  exit 1
-fi
-
-# Quote any args containing shell metacharacters.
-ltconfig_args=
-for arg
-do
-  case "$arg" in
-  *" "*|*"     "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
-  ltconfig_args="$ltconfig_args '$arg'" ;;
-  *) ltconfig_args="$ltconfig_args $arg" ;;
-  esac
-done
-
-# A relevant subset of AC_INIT.
-
-# File descriptor usage:
-# 0 standard input
-# 1 file creation
-# 2 errors and warnings
-# 3 some systems may open it to /dev/tty
-# 4 used on the Kubota Titan
-# 5 compiler messages saved in config.log
-# 6 checking for... messages and results
-if test "$silent" = yes; then
-  exec 6>/dev/null
-else
-  exec 6>&1
-fi
-exec 5>>./config.log
-
-# NLS nuisances.
-# Only set LANG and LC_ALL to C if already set.
-# These must not be set unconditionally because not all systems understand
-# e.g. LANG=C (notably SCO).
-if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
-if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
-
-if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
-  # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
-  if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
-    ac_n= ac_c='
-' ac_t='       '
-  else
-    ac_n=-n ac_c= ac_t=
-  fi
-else
-  ac_n= ac_c='\c' ac_t=
-fi
-
-if test -z "$srcdir"; then
-  # Assume the source directory is the same one as the path to ltmain.sh.
-  srcdir=`$echo "$ltmain" | $Xsed -e 's%/[^/]*$%%'`
-  test "$srcdir" = "$ltmain" && srcdir=.
-fi
-
-trap "$rm conftest*; exit 1" 1 2 15
-if test "$verify_host" = yes; then
-  # Check for config.guess and config.sub.
-  ac_aux_dir=
-  for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
-    if test -f $ac_dir/config.guess; then
-      ac_aux_dir=$ac_dir
-      break
-    fi
-  done
-  if test -z "$ac_aux_dir"; then
-    echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2
-    echo "$help" 1>&2
-    exit 1
-  fi
-  ac_config_guess=$ac_aux_dir/config.guess
-  ac_config_sub=$ac_aux_dir/config.sub
-
-  # Make sure we can run config.sub.
-  if $ac_config_sub sun4 >/dev/null 2>&1; then :
-  else
-    echo "$progname: cannot run $ac_config_sub" 1>&2
-    echo "$help" 1>&2
-    exit 1
-  fi
-
-  echo $ac_n "checking host system type""... $ac_c" 1>&6
-
-  host_alias=$host
-  case "$host_alias" in
-  "")
-    if host_alias=`$ac_config_guess`; then :
-    else
-      echo "$progname: cannot guess host type; you must specify one" 1>&2
-      echo "$help" 1>&2
-      exit 1
-    fi ;;
-  esac
-  host=`$ac_config_sub $host_alias`
-  echo "$ac_t$host" 1>&6
-
-  # Make sure the host verified.
-  test -z "$host" && exit 1
-
-elif test -z "$host"; then
-  echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2
-  echo "$help" 1>&2
-  exit 1
-else
-  host_alias=$host
-fi
-
-# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
-case "$host_os" in
-linux-gnu*) ;;
-linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
-esac
-
-host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
-host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
-host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
-
-case "$host_os" in
-aix3*)
-  # AIX sometimes has problems with the GCC collect2 program.  For some
-  # reason, if we set the COLLECT_NAMES environment variable, the problems
-  # vanish in a puff of smoke.
-  if test "${COLLECT_NAMES+set}" != set; then
-    COLLECT_NAMES=
-    export COLLECT_NAMES
-  fi
-  ;;
-esac
-
-# Determine commands to create old-style static archives.
-old_archive_cmds='$AR cru $oldlib$oldobjs'
-old_postinstall_cmds='chmod 644 $oldlib'
-old_postuninstall_cmds=
-
-# Set a sane default for `AR'.
-test -z "$AR" && AR=ar
-
-# If RANLIB is not set, then run the test.
-if test "${RANLIB+set}" != "set"; then
-  result=no
-
-  echo $ac_n "checking for ranlib... $ac_c" 1>&6
-  IFS="${IFS=  }"; save_ifs="$IFS"; IFS="${IFS}:"
-  for dir in $PATH; do
-    test -z "$dir" && dir=.
-    if test -f $dir/ranlib; then
-      RANLIB="ranlib"
-      result="ranlib"
-      break
-    fi
-  done
-  IFS="$save_ifs"
-
-  echo "$ac_t$result" 1>&6
-fi
-
-if test -n "$RANLIB"; then
-  old_archive_cmds="$old_archive_cmds;\$RANLIB \$oldlib"
-  old_postinstall_cmds="\$RANLIB \$oldlib;$old_postinstall_cmds"
-fi
-
-# Check to see if we are using GCC.
-if test "$with_gcc" != yes || test -z "$CC"; then
-  # If CC is not set, then try to find GCC or a usable CC.
-  if test -z "$CC"; then
-    echo $ac_n "checking for gcc... $ac_c" 1>&6
-    IFS="${IFS=        }"; save_ifs="$IFS"; IFS="${IFS}:"
-    for dir in $PATH; do
-      IFS="$save_ifs"
-      test -z "$dir" && dir=.
-      if test -f $dir/gcc; then
-       CC="gcc"
-       break
-      fi
-    done
-    IFS="$save_ifs"
-
-    if test -n "$CC"; then
-      echo "$ac_t$CC" 1>&6
-    else
-      echo "$ac_t"no 1>&6
-    fi
-  fi
-
-  # Not "gcc", so try "cc", rejecting "/usr/ucb/cc".
-  if test -z "$CC"; then
-    echo $ac_n "checking for cc... $ac_c" 1>&6
-    IFS="${IFS=        }"; save_ifs="$IFS"; IFS="${IFS}:"
-    cc_rejected=no
-    for dir in $PATH; do
-      test -z "$dir" && dir=.
-      if test -f $dir/cc; then
-       if test "$dir/cc" = "/usr/ucb/cc"; then
-         cc_rejected=yes
-         continue
-       fi
-       CC="cc"
-       break
-      fi
-    done
-    IFS="$save_ifs"
-    if test $cc_rejected = yes; then
-      # We found a bogon in the path, so make sure we never use it.
-      set dummy $CC
-      shift
-      if test $# -gt 0; then
-       # We chose a different compiler from the bogus one.
-       # However, it has the same name, so the bogon will be chosen
-       # first if we set CC to just the name; use the full file name.
-       shift
-       set dummy "$dir/cc" "$@"
-       shift
-       CC="$@"
-      fi
-    fi
-
-    if test -n "$CC"; then
-      echo "$ac_t$CC" 1>&6
-    else
-      echo "$ac_t"no 1>&6
-    fi
-
-    if test -z "$CC"; then
-      echo "$progname: error: no acceptable cc found in \$PATH" 1>&2
-      exit 1
-    fi
-  fi
-
-  # Now see if the compiler is really GCC.
-  with_gcc=no
-  echo $ac_n "checking whether we are using GNU C... $ac_c" 1>&6
-  echo "$progname:424: checking whether we are using GNU C" >&5
-
-  $rm conftest.c
-  cat > conftest.c <<EOF
-#ifdef __GNUC__
-  yes;
-#endif
-EOF
-  if { ac_try='${CC-cc} -E conftest.c'; { (eval echo $progname:432: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
-    with_gcc=yes
-  fi
-  $rm conftest.c
-  echo "$ac_t$with_gcc" 1>&6
-fi
-
-# Allow CC to be a program name with arguments.
-set dummy $CC
-compiler="$2"
-
-echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6
-pic_flag=
-special_shlib_compile_flags=
-wl=
-link_static_flag=
-no_builtin_flag=
-
-if test "$with_gcc" = yes; then
-  wl='-Wl,'
-  link_static_flag='-static'
-  no_builtin_flag=' -fno-builtin'
-
-  case "$host_os" in
-  aix3* | aix4* | irix5* | irix6* | osf3* | osf4*)
-    # PIC is the default for these OSes.
-    ;;
-  os2*)
-    # We can build DLLs from non-PIC.
-    ;;
-  amigaos*)
-    # FIXME: we need at least 68020 code to build shared libraries, but
-    # adding the `-m68020' flag to GCC prevents building anything better,
-    # like `-m68040'.
-    pic_flag='-m68020 -resident32 -malways-restore-a4'
-    ;;
-  *)
-    pic_flag='-fPIC'
-    ;;
-  esac
-else
-  # PORTME Check for PIC flags for the system compiler.
-  case "$host_os" in
-  aix3* | aix4*)
-    # All AIX code is PIC.
-    link_static_flag='-bnso -bI:/lib/syscalls.exp'
-    ;;
-
-  hpux9* | hpux10*)
-    # Is there a better link_static_flag that works with the bundled CC?
-    wl='-Wl,'
-    link_static_flag="${wl}-a ${wl}archive"
-    pic_flag='+Z'
-    ;;
-
-  irix5* | irix6*)
-    wl='-Wl,'
-    link_static_flag='-non_shared'
-    # PIC (with -KPIC) is the default.
-    ;;
-
-  os2*)
-    # We can build DLLs from non-PIC.
-    ;;
-
-  osf3* | osf4*)
-    # All OSF/1 code is PIC.
-    wl='-Wl,'
-    link_static_flag='-non_shared'
-    ;;
-
-  sco3.2v5*)
-    pic_flag='-Kpic'
-    link_static_flag='-dn'
-    special_shlib_compile_flags='-belf'
-    ;;
-
-  solaris2*)
-    pic_flag='-KPIC'
-    link_static_flag='-Bstatic'
-    wl='-Wl,'
-    ;;
-
-  sunos4*)
-    pic_flag='-PIC'
-    link_static_flag='-Bstatic'
-    wl='-Qoption ld '
-    ;;
-
-  sysv4.2uw2*)
-    pic_flag='-KPIC'
-    link_static_flag='-Bstatic'
-    wl='-Wl,'
-    ;;
-
-  uts4*)
-    pic_flag='-pic'
-    link_static_flag='-Bstatic'
-    ;;
-
-  *)
-    can_build_shared=no
-    ;;
-  esac
-fi
-
-if test -n "$pic_flag"; then
-  echo "$ac_t$pic_flag" 1>&6
-
-  # Check to make sure the pic_flag actually works.
-  echo $ac_n "checking if $compiler PIC flag $pic_flag works... $ac_c" 1>&6
-  $rm conftest*
-  echo > conftest.c
-  save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS $pic_flag -DPIC"
-  echo "$progname:547: checking if $compiler PIC flag $pic_flag works" >&5
-  if { (eval echo $progname:548: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then
-    # Append any warnings to the config.log.
-    cat conftest.err 1>&5
-
-    # On HP-UX, both CC and GCC only warn that PIC is supported... then they
-    # create non-PIC objects.  So, if there were any warnings, we assume that
-    # PIC is not supported.
-    if test -s conftest.err; then
-      echo "$ac_t"no 1>&6
-      can_build_shared=no
-      pic_flag=
-    else
-      echo "$ac_t"yes 1>&6
-      pic_flag=" $pic_flag"
-    fi
-  else
-    # Append any errors to the config.log.
-    cat conftest.err 1>&5
-    can_build_shared=no
-    pic_flag=
-    echo "$ac_t"no 1>&6
-  fi
-  CFLAGS="$save_CFLAGS"
-  $rm conftest*
-else
-  echo "$ac_t"none 1>&6
-fi
-
-# Check for any special shared library compilation flags.
-if test -n "$special_shlib_compile_flags"; then
-  echo "$progname: warning: \`$CC' requires \`$special_shlib_compile_flags' to build shared libraries" 1>&2
-  if echo "$old_CC $old_CFLAGS " | egrep -e "[         ]$special_shlib_compile_flags[  ]" >/dev/null; then :
-  else
-    echo "$progname: add \`$special_shlib_compile_flags' to the CC or CFLAGS env variable and reconfigure" 1>&2
-    can_build_shared=no
-  fi
-fi
-
-echo $ac_n "checking if $compiler static flag $link_static_flag works... $ac_c" 1>&6
-$rm conftest*
-echo 'main(){return(0);}' > conftest.c
-save_LDFLAGS="$LDFLAGS"
-LDFLAGS="$LDFLAGS $link_static_flag"
-echo "$progname:591: checking if $compiler static flag $link_static_flag works" >&5
-if { (eval echo $progname:592: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  echo "$ac_t$link_static_flag" 1>&6
-else
-  echo "$ac_t"none 1>&6
-  link_static_flag=
-fi
-LDFLAGS="$save_LDFLAGS"
-$rm conftest*
-
-if test -z "$LN_S"; then
-  # Check to see if we can use ln -s, or we need hard links.
-  echo $ac_n "checking whether ln -s works... $ac_c" 1>&6
-  $rm conftestdata
-  if ln -s X conftestdata 2>/dev/null; then
-    $rm conftestdata
-    LN_S="ln -s"
-  else
-    LN_S=ln
-  fi
-  if test "$LN_S" = "ln -s"; then
-    echo "$ac_t"yes 1>&6
-  else
-    echo "$ac_t"no 1>&6
-  fi
-fi
-
-# Make sure LD is an absolute path.
-if test -z "$LD"; then
-  ac_prog=ld
-  if test "$with_gcc" = yes; then
-    # Check if gcc -print-prog-name=ld gives a path.
-    echo $ac_n "checking for ld used by GCC... $ac_c" 1>&6
-    echo "$progname:624: checking for ld used by GCC" >&5
-    ac_prog=`($CC -print-prog-name=ld) 2>&5`
-    case "$ac_prog" in
-    # Accept absolute paths.
-    /* | [A-Za-z]:\\*)
-      test -z "$LD" && LD="$ac_prog"
-      ;;
-    "")
-      # If it fails, then pretend we are not using GCC.
-      ac_prog=ld
-      ;;
-    *)
-      # If it is relative, then search for the first ld in PATH.
-      with_gnu_ld=unknown
-      ;;
-    esac
-  elif test "$with_gnu_ld" = yes; then
-    echo $ac_n "checking for GNU ld... $ac_c" 1>&6
-    echo "$progname:642: checking for GNU ld" >&5
-  else
-    echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6
-    echo "$progname:645: checking for non-GNU ld" >&5
-  fi
-
-  if test -z "$LD"; then
-    IFS="${IFS=        }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-    for ac_dir in $PATH; do
-      test -z "$ac_dir" && ac_dir=.
-      if test -f "$ac_dir/$ac_prog"; then
-       LD="$ac_dir/$ac_prog"
-       # Check to see if the program is GNU ld.  I'd rather use --version,
-       # but apparently some GNU ld's only accept -v.
-       # Break only if it was the GNU/non-GNU ld that we prefer.
-       if "$LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
-         test "$with_gnu_ld" != no && break
-       else
-         test "$with_gnu_ld" != yes && break
-       fi
-      fi
-    done
-    IFS="$ac_save_ifs"
-  fi
-
-  if test -n "$LD"; then
-    echo "$ac_t$LD" 1>&6
-  else
-    echo "$ac_t"no 1>&6
-  fi
-
-  if test -z "$LD"; then
-    echo "$progname: error: no acceptable ld found in \$PATH" 1>&2
-    exit 1
-  fi
-fi
-
-# Check to see if it really is or is not GNU ld.
-echo $ac_n "checking if the linker ($LD) is GNU ld... $ac_c" 1>&6
-# I'd rather use --version here, but apparently some GNU ld's only accept -v.
-if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
-  with_gnu_ld=yes
-else
-  with_gnu_ld=no
-fi
-echo "$ac_t$with_gnu_ld" 1>&6
-
-# See if the linker supports building shared libraries.
-echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6
-
-allow_undefined_flag=
-no_undefined_flag=
-archive_cmds=
-old_archive_from_new_cmds=
-export_dynamic_flag_spec=
-hardcode_libdir_flag_spec=
-hardcode_libdir_separator=
-hardcode_direct=no
-hardcode_minus_L=no
-hardcode_shlibpath_var=unsupported
-runpath_var=
-
-case "$host_os" in
-amigaos* | sunos4*)
-  # On these operating systems, we should treat GNU ld like the system ld.
-  gnu_ld_acts_native=yes
-  ;;
-*)
-  gnu_ld_acts_native=no
-  ;;
-esac
-
-ld_shlibs=yes
-if test "$with_gnu_ld" = yes && test "$gnu_ld_acts_native" != yes; then
-
-  # See if GNU ld supports shared libraries.
-  if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
-    archive_cmds='$CC -shared ${wl}-soname $wl$soname -o $lib$libobjs'
-    runpath_var=LD_RUN_PATH
-    ld_shlibs=yes
-  else
-    ld_shlibs=no
-  fi
-
-  if test "$ld_shlibs" = yes; then
-    hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
-    export_dynamic_flag_spec='${wl}--export-dynamic'
-  fi
-else
-  # PORTME fill in a description of your system's linker (not GNU ld)
-  case "$host_os" in
-  aix3*)
-    allow_undefined_flag=unsupported
-    archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$LD -o $objdir/$soname$libobjs -bE:$lib.exp -T512 -H512 -bM:SRE;$AR cru $lib $objdir/$soname'
-    # Note: this linker hardcodes the directories in LIBPATH if there
-    # are no directories specified by -L.
-    hardcode_minus_L=yes
-    if test "$with_gcc" = yes && test -z "$link_static_flag"; then
-      # Neither direct hardcoding nor static linking is supported with a
-      # broken collect2.
-      hardcode_direct=unsupported
-    fi
-    ;;
-
-  aix4*)
-    allow_undefined_flag=unsupported
-    archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$CC -o $objdir/$soname$libobjs ${wl}-bE:$lib.exp ${wl}-bM:SRE ${wl}-bnoentry;$AR cru $lib $objdir/$soname'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    ;;
-
-  amigaos*)
-    archive_cmds='$rm $objdir/a2ixlibrary.data;$echo "#define NAME $libname" > $objdir/a2ixlibrary.data;$echo "#define LIBRARY_ID 1" >> $objdir/a2ixlibrary.data;$echo "#define VERSION $major" >> $objdir/a2ixlibrary.data;$echo "#define REVISION $revision" >> $objdir/a2ixlibrary.data;$AR cru $lib$libobjs;$RANLIB $lib;(cd $objdir && a2ixlibrary -32)'
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_minus_L=yes
-    ;;
-
-  # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
-  # support.  Future versions do this automatically, but an explicit c++rt0.o
-  # does not break anything, and helps significantly (at the cost of a little
-  # extra space).
-  freebsd2.2*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs /usr/lib/c++rt0.o'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  # Unfortunately, older versions of FreeBSD 2 do not have this feature.
-  freebsd2*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  # FreeBSD 3, at last, uses gcc -shared to do shared libraries.
-  freebsd3*)
-    archive_cmds='$CC -shared -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  hpux9*)
-    archive_cmds='$rm $objdir/$soname;$LD -b +s +b $install_libdir -o $objdir/$soname$libobjs;mv $objdir/$soname $lib'
-    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    export_dynamic_flag_spec='${wl}-E'
-    ;;
-
-  hpux10*)
-    archive_cmds='$LD -b +h $soname +s +b $install_libdir -o $lib$libobjs'
-    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    export_dynamic_flag_spec='${wl}-E'
-    ;;
-
-  irix5* | irix6*)
-    archive_cmds='$LD -shared -o $lib -soname $soname -set_version $verstring$libobjs'
-    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
-    ;;
-
-  netbsd*)
-    # Tested with NetBSD 1.2 ld
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  openbsd*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  os2*)
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_minus_L=yes
-    allow_undefined_flag=unsupported
-    archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $objdir/$libname.def;$echo "DESCRIPTION \"$libname\"" >> $objdir/$libname.def;$echo DATA >> $objdir/$libname.def;$echo " SINGLE NONSHARED" >> $objdir/$libname.def;$echo EXPORTS >> $objdir/$libname.def;emxexp$libobjs >> $objdir/$libname.def;$CC -Zdll -Zcrtdll -o $lib$libobjs $objdir/$libname.def'
-    old_archive_from_new_cmds='emximp -o $objdir/$libname.a $objdir/$libname.def'
-    ;;
-
-  osf3* | osf4*)
-    allow_undefined_flag=' -expect_unresolved \*'
-    archive_cmds='$LD -shared${allow_undefined_flag} -o $lib -soname $soname -set_version $verstring$libobjs$deplibs'
-    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
-    hardcode_libdir_separator=:
-    ;;
-
-  sco3.2v5*)
-    archive_cmds='$LD -G -o $lib$libobjs'
-    hardcode_direct=yes
-    ;;
-
-  solaris2*)
-    no_undefined_flag=' -z text'
-    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_shlibpath_var=no
-
-    # Solaris 2 before 2.5 hardcodes -L paths.
-    case "$host_os" in
-    solaris2.[0-4]*)
-      hardcode_minus_L=yes
-      ;;
-    esac
-    ;;
-
-  sunos4*)
-    if test "$with_gcc" = yes; then
-      archive_cmds='$CC -shared -o $lib$libobjs'
-    else
-      archive_cmds='$LD -assert pure-text -Bstatic -o $lib$libobjs'
-    fi
-
-    if test "$with_gnu_ld" = yes; then
-      export_dynamic_flag_spec='${wl}-export-dynamic'
-    fi
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  uts4*)
-    archive_cmds='$LD -G -h $soname -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_direct=no
-    hardcode_minus_L=no
-    hardcode_shlibpath_var=no
-    ;;
-
-  *)
-    ld_shlibs=no
-    can_build_shared=no
-    ;;
-  esac
-fi
-echo "$ac_t$ld_shlibs" 1>&6
-
-if test -z "$NM"; then
-  echo $ac_n "checking for BSD-compatible nm... $ac_c" 1>&6
-  case "$NM" in
-  /* | [A-Za-z]:\\*) ;; # Let the user override the test with a path.
-  *)
-    IFS="${IFS=        }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-    for ac_dir in /usr/ucb /usr/ccs/bin $PATH /bin; do
-      test -z "$ac_dir" && ac_dir=.
-      if test -f $ac_dir/nm; then
-        # Check to see if the nm accepts a BSD-compat flag.
-        # Adding the `sed 1q' prevents false positives on HP-UX, which says:
-        #   nm: unknown option "B" ignored
-        if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
-          NM="$ac_dir/nm -B"
-        elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
-          NM="$ac_dir/nm -p"
-       else
-          NM="$ac_dir/nm"
-       fi
-        break
-      fi
-    done
-    IFS="$ac_save_ifs"
-    test -z "$NM" && NM=nm
-    ;;
-  esac
-  echo "$ac_t$NM" 1>&6
-fi
-
-# Check for command to grab the raw symbol name followed by C symbol from nm.
-echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6
-
-# These are sane defaults that work on at least a few old systems.
-# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
-
-# Character class describing NM global symbol codes.
-symcode='[BCDEGRSTU]'
-
-# Regexp to match symbols that can be accessed directly from C.
-sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
-
-# Transform the above into a raw symbol and a C symbol.
-symxfrm='\1 \1'
-
-# Define system-specific variables.
-case "$host_os" in
-aix*)
-  symcode='[BCDTU]'
-  ;;
-irix*)
-  # Cannot use undefined symbols on IRIX because inlined functions mess us up.
-  symcode='[BCDEGRST]'
-  ;;
-solaris2*)
-  symcode='[BDTU]'
-  ;;
-esac
-
-# If we're using GNU nm, then use its standard symbol codes.
-if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
-  symcode='[ABCDGISTUW]'
-fi
-
-# Write the raw and C identifiers.
-global_symbol_pipe="sed -n -e 's/^.* $symcode $sympat$/$symxfrm/p'"
-
-# Check to see that the pipe works correctly.
-pipe_works=no
-$rm conftest*
-cat > conftest.c <<EOF
-#ifdef __cplusplus
-extern "C" {
-#endif
-char nm_test_var;
-void nm_test_func(){}
-#ifdef __cplusplus
-}
-#endif
-main(){nm_test_var='a';nm_test_func();return(0);}
-EOF
-
-echo "$progname:971: checking if global_symbol_pipe works" >&5
-if { (eval echo $progname:972: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.o; then
-  # Now try to grab the symbols.
-  nlist=conftest.nm
-  if { echo "$progname:975: eval \"$NM conftest.o | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.o | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then
-
-    # Try sorting and uniquifying the output.
-    if sort "$nlist" | uniq > "$nlist"T; then
-      mv -f "$nlist"T "$nlist"
-      wcout=`wc "$nlist" 2>/dev/null`
-      count=`$echo "X$wcout" | $Xsed -e 's/^[  ]*\([0-9][0-9]*\).*$/\1/'`
-      (test "$count" -ge 0) 2>/dev/null || count=-1
-    else
-      rm -f "$nlist"T
-      count=-1
-    fi
-
-    # Make sure that we snagged all the symbols we need.
-    if egrep ' nm_test_var$' "$nlist" >/dev/null; then
-      if egrep ' nm_test_func$' "$nlist" >/dev/null; then
-       cat <<EOF > conftest.c
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-EOF
-        # Now generate the symbol file.
-        sed 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> conftest.c
-
-       cat <<EOF >> conftest.c
-#if defined (__STDC__) && __STDC__
-# define __ptr_t void *
-#else
-# define __ptr_t char *
-#endif
-
-/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */
-int dld_preloaded_symbol_count = $count;
-
-/* The mapping between symbol names and symbols. */
-struct {
-  char *name;
-  __ptr_t address;
-}
-dld_preloaded_symbols[] =
-{
-EOF
-        sed 's/^\(.*\) \(.*\)$/  {"\1", (__ptr_t) \&\2},/' < "$nlist" >> conftest.c
-        cat <<\EOF >> conftest.c
-  {0, (__ptr_t) 0}
-};
-
-#ifdef __cplusplus
-}
-#endif
-EOF
-        # Now try linking the two files.
-        mv conftest.o conftestm.o
-       save_LIBS="$LIBS"
-       save_CFLAGS="$CFLAGS"
-        LIBS='conftestm.o'
-       CFLAGS="$CFLAGS$no_builtin_flag"
-        if { (eval echo $progname:1033: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-          pipe_works=yes
-        else
-          echo "$progname: failed program was:" >&5
-          cat conftest.c >&5
-        fi
-        LIBS="$save_LIBS"
-      else
-        echo "cannot find nm_test_func in $nlist" >&5
-      fi
-    else
-      echo "cannot find nm_test_var in $nlist" >&5
-    fi
-  else
-    echo "cannot run $global_symbol_pipe" >&5
-  fi
-else
-  echo "$progname: failed program was:" >&5
-  cat conftest.c >&5
-fi
-$rm conftest*
-
-# Do not use the global_symbol_pipe unless it works.
-echo "$ac_t$pipe_works" 1>&6
-test "$pipe_works" = yes || global_symbol_pipe=
-
-# Check hardcoding attributes.
-echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6
-hardcode_action=
-if test -n "$hardcode_libdir_flag_spec" || \
-   test -n "$runpath_var"; then
-
-  # We can hardcode non-existant directories.
-  if test "$hardcode_direct" != no && \
-     test "$hardcode_minus_L" != no && \
-     test "$hardcode_shlibpath_var" != no; then
-
-    # Linking always hardcodes the temporary library directory.
-    hardcode_action=relink
-  else
-    # We can link without hardcoding, and we can hardcode nonexisting dirs.
-    hardcode_action=immediate
-  fi
-elif test "$hardcode_direct" != yes && \
-     test "$hardcode_minus_L" != yes && \
-     test "$hardcode_shlibpath_var" != yes; then
-  # We cannot hardcode anything.
-  hardcode_action=unsupported
-else
-  # We can only hardcode existing directories.
-  hardcode_action=relink
-fi
-echo "$ac_t$hardcode_action" 1>&6
-test "$hardcode_action" = unsupported && can_build_shared=no
-
-
-reload_flag=
-reload_cmds='$LD$reload_flag -o $output$reload_objs'
-echo $ac_n "checking for $LD option to reload object files... $ac_c" 1>&6
-# PORTME Some linker may need a different reload flag.
-reload_flag='-r'
-echo "$ac_t$reload_flag"
-test -n "$reload_flag" && reload_flag=" $reload_flag"
-
-# PORTME Fill in your ld.so characteristics
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-version_type=none
-dynamic_linker="$host_os ld.so"
-
-echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6
-case "$host_os" in
-aix3* | aix4*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix $libname.a'
-  shlibpath_var=LIBPATH
-
-  # AIX has no versioning support, so we append a major version to the name.
-  soname_spec='${libname}${release}.so.$major'
-  ;;
-
-amigaos*)
-  library_names_spec='$libname.ixlibrary $libname.a'
-  # Create ${libname}_ixlibrary.a entries in /sys/libs.
-  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
-  ;;
-
-freebsd2* | freebsd3*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-gnu*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-hpux9* | hpux10*)
-  # Give a soname corresponding to the major version so that dld.sl refuses to
-  # link against other versions.
-  dynamic_linker="$host_os dld.sl"
-  version_type=sunos
-  shlibpath_var=SHLIB_PATH
-  library_names_spec='${libname}${release}.sl.$versuffix ${libname}${release}.sl.$major $libname.sl'
-  soname_spec='${libname}${release}.sl.$major'
-  # HP-UX runs *really* slowly unless shared libraries are mode 555.
-  postinstall_cmds='chmod 555 $lib'
-  ;;
-
-irix5* | irix6*)
-  version_type=osf
-  soname_spec='${libname}${release}.so'
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
-  dynamic_linker=no
-  ;;
-
-# This must be Linux ELF.
-linux-gnu*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -n $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-
-  if test -f /lib/ld.so.1; then
-    dynamic_linker='GNU ld.so'
-  else
-    # Only the GNU ld.so supports shared libraries on MkLinux.
-    case "$host_cpu" in
-    powerpc*) dynamic_linker=no ;;
-    *) dynamic_linker='Linux ld.so' ;;
-    esac
-  fi
-  ;;
-
-netbsd* | openbsd*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-os2*)
-  libname_spec='$name'
-  library_names_spec='$libname.dll $libname.a'
-  dynamic_linker='OS/2 ld.exe'
-  shlibpath_var=LIBPATH
-  ;;
-
-osf3* | osf4*)
-  version_type=osf
-  soname_spec='${libname}${release}.so'
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sco3.2v5*)
-  version_type=osf
-  soname_spec='${libname}${release}.so.$major'
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-solaris2*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sunos4*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  finish_cmds='PATH="$PATH:/usr/etc" ldconfig $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sysv4.2uw2*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-uts4*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-*)
-  dynamic_linker=no
-  ;;
-esac
-echo "$ac_t$dynamic_linker"
-test "$dynamic_linker" = no && can_build_shared=no
-
-# Report the final consequences.
-echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6
-
-echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6
-test "$can_build_shared" = "no" && enable_shared=no
-
-# On AIX, shared libraries and static libraries use the same namespace, and
-# are all built from PIC.
-case "$host_os" in
-aix*)
-  test "$enable_shared" = yes && enable_static=no
-  if test -n "$RANLIB"; then
-    archive_cmds="$archive_cmds;\$RANLIB \$lib"
-    postinstall_cmds='$RANLIB $lib'
-  fi
-  ;;
-esac
-
-echo "$ac_t$enable_shared" 1>&6
-
-# Make sure either enable_shared or enable_static is yes.
-test "$enable_shared" = yes || enable_static=yes
-
-echo "checking whether to build static libraries... $enable_static" 1>&6
-
-echo $ac_n "checking for objdir... $ac_c" 1>&6
-rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
-  objdir=.libs
-else
-  # MS-DOS does not allow filenames that begin with a dot.
-  objdir=_libs
-fi
-rmdir .libs 2>/dev/null
-echo "$ac_t$objdir" 1>&6
-
-# Copy echo and quote the copy, instead of the original, because it is
-# used later.
-ltecho="$echo"
-
-# Now quote all the things that may contain metacharacters.
-for var in ltecho old_CC old_CFLAGS old_CPPFLAGS old_LD old_NM old_RANLIB \
-  old_LN_S AR CC LD LN_S NM reload_flag reload_cmds wl pic_flag \
-  link_static_flag no_builtin_flag export_dynamic_flag_spec \
-  libname_spec library_names_spec soname_spec RANLIB \
-  old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
-  old_postuninstall_cmds archive_cmds postinstall_cmds postuninstall_cmds \
-  allow_undefined_flag no_undefined_flag \
-  finish_cmds finish_eval global_symbol_pipe \
-  hardcode_libdir_flag_spec hardcode_libdir_separator; do
-
-  case "$var" in
-  reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
-  old_postinstall_cmds | old_postuninstall_cmds | archive_cmds | \
-  postinstall_cmds | postuninstall_cmds | finish_cmds)
-    # Double-quote double-evaled strings.
-    eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\"\`"
-    ;;
-  *)
-    eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`"
-    ;;
-  esac
-done
-
-ofile=libtool
-trap "$rm $ofile; exit 1" 1 2 15
-echo creating $ofile
-$rm $ofile
-cat <<EOF > $ofile
-#! /bin/sh
-
-# libtool - Provide generalized library-building support services.
-# Generated automatically by $PROGRAM - GNU $PACKAGE $VERSION
-# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh.
-#
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# This program was configured as follows,
-# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-#
-# CC="$old_CC" CFLAGS="$old_CFLAGS" CPPFLAGS="$old_CPPFLAGS" \\
-# LD="$old_LD" NM="$old_NM" RANLIB="$old_RANLIB" LN_S="$old_LN_S" \\
-#   $0$ltconfig_args
-#
-# Compiler and other test output produced by $progname, useful for
-# debugging $progname, is in ./config.log if it exists.
-
-# Sed that helps us avoid accidentally triggering echo(1) options like -n.
-Xsed="sed -e s/^X//"
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-if test "\${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi
-
-# An echo program that does not interpret backslashes.
-echo="$ltecho"
-
-# The version of $progname that generated this script.
-LTCONFIG_VERSION="$VERSION"
-
-# Shell to use when invoking shell scripts.
-SHELL=${CONFIG_SHELL-/bin/sh}
-
-# Whether or not to build libtool libraries.
-build_libtool_libs=$enable_shared
-
-# Whether or not to build old-style libraries.
-build_old_libs=$enable_static
-
-# The host system.
-host_alias="$host_alias"
-host="$host"
-
-# The archiver.
-AR="$AR"
-
-# The default C compiler.
-CC="$CC"
-
-# The linker used to build libraries.
-LD="$LD"
-
-# Whether we need hard or soft links.
-LN_S="$LN_S"
-
-# A BSD-compatible nm program.
-NM="$NM"
-
-# The name of the directory that contains temporary libtool files.
-objdir="$objdir"
-
-# How to create reloadable object files.
-reload_flag="$reload_flag"
-reload_cmds="$reload_cmds"
-
-# How to pass a linker flag through the compiler.
-wl="$wl"
-
-# Additional compiler flags for building library objects.
-pic_flag="$pic_flag"
-
-# Compiler flag to prevent dynamic linking.
-link_static_flag="$link_static_flag"
-
-# Compiler flag to turn off builtin functions.
-no_builtin_flag="$no_builtin_flag"
-
-# Compiler flag to allow reflexive dlopens.
-export_dynamic_flag_spec="$export_dynamic_flag_spec"
-
-# Library versioning type.
-version_type=$version_type
-
-# Format of library name prefix.
-libname_spec="$libname_spec"
-
-# List of archive names.  First name is the real one, the rest are links.
-# The last name is the one that the linker finds with -lNAME.
-library_names_spec="$library_names_spec"
-
-# The coded name of the library, if different from the real name.
-soname_spec="$soname_spec"
-
-# Commands used to build and install an old-style archive.
-RANLIB="$RANLIB"
-old_archive_cmds="$old_archive_cmds"
-old_postinstall_cmds="$old_postinstall_cmds"
-old_postuninstall_cmds="$old_postuninstall_cmds"
-
-# Create an old-style archive from a shared archive.
-old_archive_from_new_cmds="$old_archive_from_new_cmds"
-
-# Commands used to build and install a shared archive.
-archive_cmds="$archive_cmds"
-postinstall_cmds="$postinstall_cmds"
-postuninstall_cmds="$postuninstall_cmds"
-
-# Flag that allows shared libraries with undefined symbols to be built.
-allow_undefined_flag="$allow_undefined_flag"
-
-# Flag that forces no undefined symbols.
-no_undefined_flag="$no_undefined_flag"
-
-# Commands used to finish a libtool library installation in a directory.
-finish_cmds="$finish_cmds"
-
-# Same as above, but a single script fragment to be evaled but not shown.
-finish_eval="$finish_eval"
-
-# Take the output of nm and produce a listing of raw symbols and C names.
-global_symbol_pipe="$global_symbol_pipe"
-
-# This is the shared library runtime path variable.
-runpath_var=$runpath_var
-
-# This is the shared library path variable.
-shlibpath_var=$shlibpath_var
-
-# How to hardcode a shared library path into an executable.
-hardcode_action=$hardcode_action
-
-# Flag to hardcode \$libdir into a binary during linking.
-# This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec="$hardcode_libdir_flag_spec"
-
-# Whether we need a single -rpath flag with a separated argument.
-hardcode_libdir_separator="$hardcode_libdir_separator"
-
-# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
-# resulting binary.
-hardcode_direct=$hardcode_direct
-
-# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
-# resulting binary.
-hardcode_minus_L=$hardcode_minus_L
-
-# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
-# the resulting binary.
-hardcode_shlibpath_var=$hardcode_shlibpath_var
-
-EOF
-
-case "$host_os" in
-aix3*)
-  cat <<\EOF >> $ofile
-# AIX sometimes has problems with the GCC collect2 program.  For some
-# reason, if we set the COLLECT_NAMES environment variable, the problems
-# vanish in a puff of smoke.
-if test "${COLLECT_NAMES+set}" != set; then
-  COLLECT_NAMES=
-  export COLLECT_NAMES
-fi
-
-EOF
-  ;;
-esac
-
-# Append the ltmain.sh script.
-cat "$ltmain" >> $ofile || (rm -f $ofile; exit 1)
-
-chmod +x $ofile
-exit 0
-
-# Local Variables:
-# mode:shell-script
-# sh-indentation:2
-# End:
index e9350b3fab0c4d1c26d3120d0caf7b823dcd0a5a..0dbca1e48ed140558b92bf689878acdf3063bef6 100644 (file)
--- a/ltmain.sh
+++ b/ltmain.sh
@@ -1,8 +1,9 @@
 # ltmain.sh - Provide generalized library-building support services.
-# NOTE: Changing this file will not affect anything until you rerun ltconfig.
+# NOTE: Changing this file will not affect anything until you rerun configure.
 #
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X$1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell, and then maybe $echo will work.
+  exec $SHELL "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+$*
+EOF
+  exit 0
+fi
+
+# define SED for historic ltconfig's generated by Libtool 1.3
+test -z "$SED" && SED=sed
+
 # The name of this program.
-progname=`$echo "$0" | sed 's%^.*/%%'`
+progname=`$echo "$0" | ${SED} 's%^.*/%%'`
 modename="$progname"
 
 # Constants.
 PROGRAM=ltmain.sh
 PACKAGE=libtool
-VERSION=1.2
+VERSION=1.4.3
+TIMESTAMP=" (1.922.2.110 2002/10/23 01:39:54)"
 
 default_mode=
 help="Try \`$progname --help' for more information."
@@ -41,21 +70,34 @@ rm="rm -f"
 
 # Sed substitution that helps us do robust quoting.  It backslashifies
 # metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
+Xsed="${SED}"' -e 1s/^X//'
 sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+# test EBCDIC or ASCII                                                         
+case `echo A|od -x` in                                                         
+ *[Cc]1*) # EBCDIC based system                                                
+  SP2NL="tr '\100' '\n'"                                                       
+  NL2SP="tr '\r\n' '\100\100'"                                                 
+  ;;                                                                           
+ *) # Assume ASCII based system                                                
+  SP2NL="tr '\040' '\012'"                                                     
+  NL2SP="tr '\015\012' '\040\040'"                                             
+  ;;                                                                           
+esac                                                                           
 
 # NLS nuisances.
 # Only set LANG and LC_ALL to C if already set.
 # These must not be set unconditionally because not all systems understand
 # e.g. LANG=C (notably SCO).
-if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
-if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
-
-if test "$LTCONFIG_VERSION" != "$VERSION"; then
-  echo "$modename: ltconfig version \`$LTCONFIG_VERSION' does not match $PROGRAM version \`$VERSION'" 1>&2
-  echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
-  exit 1
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+  save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
 fi
+if test "${LANG+set}" = set; then
+  save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+# Make sure IFS has a sensible default
+: ${IFS="      "}
 
 if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
   echo "$modename: not configured to build any kind of library" 1>&2
@@ -72,6 +114,8 @@ run=
 show="$echo"
 show_help=
 execute_dlfiles=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
 
 # Parse our command line options once, thoroughly.
 while test $# -gt 0
@@ -79,16 +123,16 @@ do
   arg="$1"
   shift
 
-  case "$arg" in
+  case $arg in
   -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
   *) optarg= ;;
   esac
 
   # If the previous option needs an argument, assign it.
   if test -n "$prev"; then
-    case "$prev" in
+    case $prev in
     execute_dlfiles)
-      eval "$prev=\"\$$prev \$arg\""
+      execute_dlfiles="$execute_dlfiles $arg"
       ;;
     *)
       eval "$prev=\$arg"
@@ -101,16 +145,26 @@ do
   fi
 
   # Have we seen a non-optional argument yet?
-  case "$arg" in
+  case $arg in
   --help)
     show_help=yes
     ;;
 
   --version)
-    echo "$PROGRAM (GNU $PACKAGE) $VERSION"
+    echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+    exit 0
+    ;;
+
+  --config)
+    ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $0
     exit 0
     ;;
 
+  --debug)
+    echo "$progname: enabling shell trace mode"
+    set -x
+    ;;
+
   --dry-run | -n)
     run=:
     ;;
@@ -135,6 +189,8 @@ do
   --mode) prevopt="--mode" prev=mode ;;
   --mode=*) mode="$optarg" ;;
 
+  --preserve-dup-deps) duplicate_deps="yes" ;;
+
   --quiet | --silent)
     show=:
     ;;
@@ -163,24 +219,29 @@ if test -n "$prevopt"; then
   exit 1
 fi
 
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
 if test -z "$show_help"; then
 
   # Infer the operation mode.
   if test -z "$mode"; then
-    case "$nonopt" in
-    *cc | *++ | gcc* | *-gcc*)
+    case $nonopt in
+    *cc | *++ | gcc* | *-gcc* | xlc*)
       mode=link
       for arg
       do
-        case "$arg" in
-        -c)
-           mode=compile
-           break
-           ;;
-        esac
+       case $arg in
+       -c)
+          mode=compile
+          break
+          ;;
+       esac
       done
       ;;
-    *db | *dbx)
+    *db | *dbx | *strace | *truss)
       mode=execute
       ;;
     *install*|cp|mv)
@@ -195,11 +256,11 @@ if test -z "$show_help"; then
 
       # Just use the default operation mode.
       if test -z "$mode"; then
-        if test -n "$nonopt"; then
-          $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
-        else
-          $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
-        fi
+       if test -n "$nonopt"; then
+         $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+       else
+         $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+       fi
       fi
       ;;
     esac
@@ -217,31 +278,118 @@ if test -z "$show_help"; then
   help="Try \`$modename --help --mode=$mode' for more information."
 
   # These modes are in order of execution frequency so that they run quickly.
-  case "$mode" in
+  case $mode in
   # libtool compile mode
   compile)
     modename="$modename: compile"
     # Get the compilation command and the source file.
     base_compile=
+    prev=
     lastarg=
     srcfile="$nonopt"
     suppress_output=
 
+    user_target=no
     for arg
     do
+      case $prev in
+      "") ;;
+      xcompiler)
+       # Aesthetically quote the previous argument.
+       prev=
+       lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+
+       case $arg in
+       # Double-quote args containing other shell metacharacters.
+       # Many Bourne shells cannot handle close brackets correctly
+       # in scan sets, so we specify it separately.
+       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \   ]*|*]*|"")
+         arg="\"$arg\""
+         ;;
+       esac
+
+       # Add the previous argument to base_compile.
+       if test -z "$base_compile"; then
+         base_compile="$lastarg"
+       else
+         base_compile="$base_compile $lastarg"
+       fi
+       continue
+       ;;
+      esac
+
       # Accept any command-line options.
-      case "$arg" in
+      case $arg in
       -o)
-       $echo "$modename: you cannot specify the output filename with \`-o'" 1>&2
-       $echo "$help" 1>&2
-       exit 1
+       if test "$user_target" != "no"; then
+         $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+         exit 1
+       fi
+       user_target=next
        ;;
 
       -static)
-       build_libtool_libs=no
        build_old_libs=yes
        continue
        ;;
+
+      -prefer-pic)
+       pic_mode=yes
+       continue
+       ;;
+
+      -prefer-non-pic)
+       pic_mode=no
+       continue
+       ;;
+
+      -Xcompiler)
+       prev=xcompiler
+       continue
+       ;;
+
+      -Wc,*)
+       args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+       lastarg=
+       save_ifs="$IFS"; IFS=','
+       for arg in $args; do
+         IFS="$save_ifs"
+
+         # Double-quote args containing other shell metacharacters.
+         # Many Bourne shells cannot handle close brackets correctly
+         # in scan sets, so we specify it separately.
+         case $arg in
+           *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \       ]*|*]*|"")
+           arg="\"$arg\""
+           ;;
+         esac
+         lastarg="$lastarg $arg"
+       done
+       IFS="$save_ifs"
+       lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+       # Add the arguments to base_compile.
+       if test -z "$base_compile"; then
+         base_compile="$lastarg"
+       else
+         base_compile="$base_compile $lastarg"
+       fi
+       continue
+       ;;
+      esac
+
+      case $user_target in
+      next)
+       # The next one is the -o target name
+       user_target=yes
+       continue
+       ;;
+      yes)
+       # We got the output file
+       user_target=set
+       libobj="$arg"
+       continue
+       ;;
       esac
 
       # Accept the current argument as the source file.
@@ -256,10 +404,10 @@ if test -z "$show_help"; then
       lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
 
       # Double-quote args containing other shell metacharacters.
-      # Many Bourne shells cannot handle close brackets correctly in scan
-      # sets, so we specify it separately.
-      case "$lastarg" in
-      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*)
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      case $lastarg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*|"")
        lastarg="\"$lastarg\""
        ;;
       esac
@@ -272,12 +420,23 @@ if test -z "$show_help"; then
       fi
     done
 
-    # Get the name of the library object.
-    libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+    case $user_target in
+    set)
+      ;;
+    no)
+      # Get the name of the library object.
+      libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+      ;;
+    *)
+      $echo "$modename: you must specify a target with \`-o'" 1>&2
+      exit 1
+      ;;
+    esac
 
     # Recognize several different file suffixes.
-    xform='[cCFSfms]'
-    case "$libobj" in
+    # If the user specifies -o file.o, it is replaced with file.lo
+    xform='[cCFSfmso]'
+    case $libobj in
     *.ada) xform=ada ;;
     *.adb) xform=adb ;;
     *.ads) xform=ads ;;
@@ -292,10 +451,10 @@ if test -z "$show_help"; then
 
     libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
 
-    case "$libobj" in
-    *.lo) obj=`$echo "X$libobj" | $Xsed -e 's/\.lo$/.o/'` ;;
+    case $libobj in
+    *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
     *)
-      $echo "$modename: cannot determine name of library object from \`$srcfile'" 1>&2
+      $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
       exit 1
       ;;
     esac
@@ -308,11 +467,65 @@ if test -z "$show_help"; then
 
     # Delete any leftover library objects.
     if test "$build_old_libs" = yes; then
-      $run $rm $obj $libobj
-      trap "$run $rm $obj $libobj; exit 1" 1 2 15
+      removelist="$obj $libobj"
+    else
+      removelist="$libobj"
+    fi
+
+    $run $rm $removelist
+    trap "$run $rm $removelist; exit 1" 1 2 15
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2*)
+      pic_mode=default
+      ;;
+    esac
+    if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+      removelist="$removelist $output_obj $lockfile"
+      trap "$run $rm $removelist; exit 1" 1 2 15
     else
-      $run $rm $libobj
-      trap "$run $rm $libobj; exit 1" 1 2 15
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $run ln "$0" "$lockfile" 2>/dev/null; do
+       $show "Waiting for $lockfile to be removed"
+       sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+       echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $run $rm $removelist
+       exit 1
+      fi
+      echo $srcfile > "$lockfile"
+    fi
+
+    if test -n "$fix_srcfile_path"; then
+      eval srcfile=\"$fix_srcfile_path\"
     fi
 
     # Only build a PIC object if we are building libtool libraries.
@@ -320,24 +533,121 @@ if test -z "$show_help"; then
       # Without this assignment, base_compile gets emptied.
       fbsd_hideous_sh_bug=$base_compile
 
-      # All platforms use -DPIC, to notify preprocessed assembler code.
-      $show "$base_compile$pic_flag -DPIC $srcfile"
-      if $run eval "$base_compile\$pic_flag -DPIC \$srcfile"; then :
+      if test "$pic_mode" != no; then
+       # All platforms use -DPIC, to notify preprocessed assembler code.
+       command="$base_compile $srcfile $pic_flag -DPIC"
       else
-        test -n "$obj" && $run $rm $obj
-        exit 1
+       # Don't build PIC code
+       command="$base_compile $srcfile"
       fi
+      if test "$build_old_libs" = yes; then
+       lo_libobj="$libobj"
+       dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+       if test "X$dir" = "X$libobj"; then
+         dir="$objdir"
+       else
+         dir="$dir/$objdir"
+       fi
+       libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
 
-      # If we have no pic_flag, then copy the object into place and finish.
-      if test -z "$pic_flag"; then
-        $show "$LN_S $obj $libobj"
-        $run $LN_S $obj $libobj
-        exit $?
+       if test -d "$dir"; then
+         $show "$rm $libobj"
+         $run $rm $libobj
+       else
+         $show "$mkdir $dir"
+         $run $mkdir $dir
+         status=$?
+         if test $status -ne 0 && test ! -d $dir; then
+           exit $status
+         fi
+       fi
       fi
+      if test "$compiler_o_lo" = yes; then
+       output_obj="$libobj"
+       command="$command -o $output_obj"
+      elif test "$compiler_c_o" = yes; then
+       output_obj="$obj"
+       command="$command -o $output_obj"
+      fi
+
+      $run $rm "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+       test -n "$output_obj" && $run $rm $removelist
+       exit 1
+      fi
+
+      if test "$need_locks" = warn &&
+        test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+       echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $run $rm $removelist
+       exit 1
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test x"$output_obj" != x"$libobj"; then
+       $show "$mv $output_obj $libobj"
+       if $run $mv $output_obj $libobj; then :
+       else
+         error=$?
+         $run $rm $removelist
+         exit $error
+       fi
+      fi
+
+      # If we have no pic_flag, then copy the object into place and finish.
+      if (test -z "$pic_flag" || test "$pic_mode" != default) &&
+        test "$build_old_libs" = yes; then
+       # Rename the .lo from within objdir to obj
+       if test -f $obj; then
+         $show $rm $obj
+         $run $rm $obj
+       fi
+
+       $show "$mv $libobj $obj"
+       if $run $mv $libobj $obj; then :
+       else
+         error=$?
+         $run $rm $removelist
+         exit $error
+       fi
 
-      # Just move the object, then go on to compile the next one
-      $show "$mv $obj $libobj"
-      $run $mv $obj $libobj || exit 1
+       xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+       if test "X$xdir" = "X$obj"; then
+         xdir="."
+       else
+         xdir="$xdir"
+       fi
+       baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"`
+       libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+       # Now arrange that obj and lo_libobj become the same file
+       $show "(cd $xdir && $LN_S $baseobj $libobj)"
+       if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then
+         # Unlock the critical section if it was locked
+         if test "$need_locks" != no; then
+           $run $rm "$lockfile"
+         fi
+         exit 0
+       else
+         error=$?
+         $run $rm $removelist
+         exit $error
+       fi
+      fi
 
       # Allow error messages only from the first compilation.
       suppress_output=' >/dev/null 2>&1'
@@ -345,64 +655,170 @@ if test -z "$show_help"; then
 
     # Only build a position-dependent object if we build old libraries.
     if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+       # Don't build PIC code
+       command="$base_compile $srcfile"
+      else
+       # All platforms use -DPIC, to notify preprocessed assembler code.
+       command="$base_compile $srcfile $pic_flag -DPIC"
+      fi
+      if test "$compiler_c_o" = yes; then
+       command="$command -o $obj"
+       output_obj="$obj"
+      fi
+
       # Suppress compiler output if we already did a PIC compilation.
-      $show "$base_compile $srcfile$suppress_output"
-      if $run eval "$base_compile \$srcfile$suppress_output"; then :
+      command="$command$suppress_output"
+      $run $rm "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+       $run $rm $removelist
+       exit 1
+      fi
+
+      if test "$need_locks" = warn &&
+        test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+       echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $run $rm $removelist
+       exit 1
+      fi
+
+      # Just move the object if needed
+      if test x"$output_obj" != x"$obj"; then
+       $show "$mv $output_obj $obj"
+       if $run $mv $output_obj $obj; then :
+       else
+         error=$?
+         $run $rm $removelist
+         exit $error
+       fi
+      fi
+
+      # Create an invalid libtool object if no PIC, so that we do not
+      # accidentally link it into a program.
+      if test "$build_libtool_libs" != yes; then
+       $show "echo timestamp > $libobj"
+       $run eval "echo timestamp > \$libobj" || exit $?
       else
-        $run $rm $obj $libobj
-        exit 1
+       # Move the .lo from within objdir
+       $show "$mv $libobj $lo_libobj"
+       if $run $mv $libobj $lo_libobj; then :
+       else
+         error=$?
+         $run $rm $removelist
+         exit $error
+       fi
       fi
     fi
 
-    # Create an invalid libtool object if no PIC, so that we do not
-    # accidentally link it into a program.
-    if test "$build_libtool_libs" != yes; then
-      $show "echo timestamp > $libobj"
-      $run eval "echo timestamp > \$libobj" || exit $?
+    # Unlock the critical section if it was locked
+    if test "$need_locks" != no; then
+      $run $rm "$lockfile"
     fi
 
     exit 0
     ;;
 
   # libtool link mode
-  link)
+  link | relink)
     modename="$modename: link"
-    CC="$nonopt"
-    allow_undefined=yes
-    compile_command="$CC"
-    finalize_command="$CC"
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invokation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args="$nonopt"
+    compile_command="$nonopt"
+    finalize_command="$nonopt"
 
+    compile_rpath=
+    finalize_rpath=
     compile_shlibpath=
     finalize_shlibpath=
+    convenience=
+    old_convenience=
     deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+
+    avoid_version=no
     dlfiles=
     dlprefiles=
+    dlself=no
     export_dynamic=no
-    hardcode_libdirs=
+    export_symbols=
+    export_symbols_regex=
+    generated=
     libobjs=
-    link_against_libtool_libs=
     ltlibs=
+    module=no
+    no_install=no
     objs=
+    prefer_static_libs=no
+    preload=no
     prev=
     prevarg=
     release=
     rpath=
+    xrpath=
     perm_rpath=
     temp_rpath=
+    thread_safe=no
     vinfo=
 
     # We need to know -static, to get the right output filenames.
     for arg
     do
-      case "$arg" in
+      case $arg in
       -all-static | -static)
-        if test "X$arg" = "X-all-static" && test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+       if test "X$arg" = "X-all-static"; then
+         if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
            $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
-        fi
-        build_libtool_libs=no
+         fi
+         if test -n "$link_static_flag"; then
+           dlopen_self=$dlopen_self_static
+         fi
+       else
+         if test -z "$pic_flag" && test -n "$link_static_flag"; then
+           dlopen_self=$dlopen_self_static
+         fi
+       fi
+       build_libtool_libs=no
        build_old_libs=yes
-        break
-        ;;
+       prefer_static_libs=yes
+       break
+       ;;
       esac
     done
 
@@ -410,55 +826,141 @@ if test -z "$show_help"; then
     test -n "$old_archive_from_new_cmds" && build_old_libs=yes
 
     # Go through the arguments, transforming them on the way.
-    for arg
-    do
+    while test $# -gt 0; do
+      arg="$1"
+      shift
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*|"")
+       qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+       ;;
+      *) qarg=$arg ;;
+      esac
+      libtool_args="$libtool_args $qarg"
+
       # If the previous option needs an argument, assign it.
       if test -n "$prev"; then
-        case "$prev" in
-        output)
-          compile_command="$compile_command @OUTPUT@"
-          finalize_command="$finalize_command @OUTPUT@"
-          ;;
-        esac
-
-        case "$prev" in
-        dlfiles|dlprefiles)
-          case "$arg" in
-          *.la | *.lo) ;;  # We handle these cases below.
-          *)
-            dlprefiles="$dlprefiles $arg"
-            test "$prev" = dlfiles && dlfiles="$dlfiles $arg"
-            prev=
-            ;;
-          esac
-          ;;
+       case $prev in
+       output)
+         compile_command="$compile_command @OUTPUT@"
+         finalize_command="$finalize_command @OUTPUT@"
+         ;;
+       esac
+
+       case $prev in
+       dlfiles|dlprefiles)
+         if test "$preload" = no; then
+           # Add the symbol object into the linking commands.
+           compile_command="$compile_command @SYMFILE@"
+           finalize_command="$finalize_command @SYMFILE@"
+           preload=yes
+         fi
+         case $arg in
+         *.la | *.lo) ;;  # We handle these cases below.
+         force)
+           if test "$dlself" = no; then
+             dlself=needless
+             export_dynamic=yes
+           fi
+           prev=
+           continue
+           ;;
+         self)
+           if test "$prev" = dlprefiles; then
+             dlself=yes
+           elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+             dlself=yes
+           else
+             dlself=needless
+             export_dynamic=yes
+           fi
+           prev=
+           continue
+           ;;
+         *)
+           if test "$prev" = dlfiles; then
+             dlfiles="$dlfiles $arg"
+           else
+             dlprefiles="$dlprefiles $arg"
+           fi
+           prev=
+           continue
+           ;;
+         esac
+         ;;
+       expsyms)
+         export_symbols="$arg"
+         if test ! -f "$arg"; then
+           $echo "$modename: symbol file \`$arg' does not exist"
+           exit 1
+         fi
+         prev=
+         continue
+         ;;
+       expsyms_regex)
+         export_symbols_regex="$arg"
+         prev=
+         continue
+         ;;
        release)
          release="-$arg"
          prev=
          continue
          ;;
-        rpath)
-          rpath="$rpath $arg"
+       rpath | xrpath)
+         # We need an absolute path.
+         case $arg in
+         [\\/]* | [A-Za-z]:[\\/]*) ;;
+         *)
+           $echo "$modename: only absolute run-paths are allowed" 1>&2
+           exit 1
+           ;;
+         esac
+         if test "$prev" = rpath; then
+           case "$rpath " in
+           *" $arg "*) ;;
+           *) rpath="$rpath $arg" ;;
+           esac
+         else
+           case "$xrpath " in
+           *" $arg "*) ;;
+           *) xrpath="$xrpath $arg" ;;
+           esac
+         fi
          prev=
          continue
          ;;
-        *)
-          eval "$prev=\"\$arg\""
-          prev=
-          continue
-          ;;
-        esac
-      fi
+       xcompiler)
+         compiler_flags="$compiler_flags $qarg"
+         prev=
+         compile_command="$compile_command $qarg"
+         finalize_command="$finalize_command $qarg"
+         continue
+         ;;
+       xlinker)
+         linker_flags="$linker_flags $qarg"
+         compiler_flags="$compiler_flags $wl$qarg"
+         prev=
+         compile_command="$compile_command $wl$qarg"
+         finalize_command="$finalize_command $wl$qarg"
+         continue
+         ;;
+       *)
+         eval "$prev=\"\$arg\""
+         prev=
+         continue
+         ;;
+       esac
+      fi # test -n $prev
 
       prevarg="$arg"
 
-      case "$arg" in
+      case $arg in
       -all-static)
        if test -n "$link_static_flag"; then
-          compile_command="$compile_command $link_static_flag"
+         compile_command="$compile_command $link_static_flag"
          finalize_command="$finalize_command $link_static_flag"
-        fi
-        continue
+       fi
+       continue
        ;;
 
       -allow-undefined)
@@ -467,100 +969,258 @@ if test -z "$show_help"; then
        continue
        ;;
 
+      -avoid-version)
+       avoid_version=yes
+       continue
+       ;;
+
       -dlopen)
-        prev=dlfiles
-        continue
-        ;;
+       prev=dlfiles
+       continue
+       ;;
 
       -dlpreopen)
-        prev=dlprefiles
-        continue
-        ;;
+       prev=dlprefiles
+       continue
+       ;;
 
       -export-dynamic)
-        if test "$export_dynamic" != yes; then
-          export_dynamic=yes
-         if test -n "$export_dynamic_flag_spec"; then
-           eval arg=\"$export_dynamic_flag_spec\"
-         else
-           arg=
-         fi
-
-          # Add the symbol object into the linking commands.
-         compile_command="$compile_command @SYMFILE@"
-         finalize_command="$finalize_command @SYMFILE@"
-        fi
-        ;;
-
-      -L*)
-        dir=`$echo "X$arg" | $Xsed -e 's%^-L\(.*\)$%\1%'`
-        case "$dir" in
-        /* | [A-Za-z]:\\*)
-         # Add the corresponding hardcode_libdir_flag, if it is not identical.
-          ;;
-        *)
-          $echo "$modename: \`-L$dir' cannot specify a relative directory" 1>&2
-          exit 1
-          ;;
-        esac
-        deplibs="$deplibs $arg"
-        ;;
-
-      -l*) deplibs="$deplibs $arg" ;;
-
-      -no-undefined)
-       allow_undefined=no
+       export_dynamic=yes
        continue
        ;;
 
-      -o) prev=output ;;
-
-      -release)
-       prev=release
+      -export-symbols | -export-symbols-regex)
+       if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+         $echo "$modename: more than one -exported-symbols argument is not allowed"
+         exit 1
+       fi
+       if test "X$arg" = "X-export-symbols"; then
+         prev=expsyms
+       else
+         prev=expsyms_regex
+       fi
        continue
        ;;
 
-      -rpath)
-        prev=rpath
-        continue
-        ;;
-
-      -static)
-       # If we have no pic_flag, then this is the same as -all-static.
-       if test -z "$pic_flag" && test -n "$link_static_flag"; then
-          compile_command="$compile_command $link_static_flag"
-         finalize_command="$finalize_command $link_static_flag"
-        fi
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+       case $with_gcc/$host in
+       no/*-*-irix* | no/*-*-nonstopux*)
+         compile_command="$compile_command $arg"
+         finalize_command="$finalize_command $arg"
+         ;;
+       esac
        continue
        ;;
 
-      -version-info)
-        prev=vinfo
-        continue
-        ;;
-
-      # Some other compiler flag.
-      -* | +*)
-       # Unknown arguments in both finalize_command and compile_command need
-       # to be aesthetically quoted because they are evaled later.
-       arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-       case "$arg" in
-       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \   ]*|*]*)
-         arg="\"$arg\""
+      -L*)
+       dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+       # We need an absolute path.
+       case $dir in
+       [\\/]* | [A-Za-z]:[\\/]*) ;;
+       *)
+         absdir=`cd "$dir" && pwd`
+         if test -z "$absdir"; then
+           $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+           exit 1
+         fi
+         dir="$absdir"
          ;;
        esac
-        ;;
-
-      *.o | *.a)
-        # A standard object.
-        objs="$objs $arg"
-        ;;
-
-      *.lo)
-        # A library object.
-       if test "$prev" = dlfiles; then
-         dlfiles="$dlfiles $arg"
-         if test "$build_libtool_libs" = yes; then
+       case "$deplibs " in
+       *" -L$dir "*) ;;
+       *)
+         deplibs="$deplibs -L$dir"
+         lib_search_path="$lib_search_path $dir"
+         ;;
+       esac
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+         case :$dllsearchpath: in
+         *":$dir:"*) ;;
+         *) dllsearchpath="$dllsearchpath:$dir";;
+         esac
+         ;;
+       esac
+       continue
+       ;;
+
+      -l*)
+       if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+         case $host in
+         *-*-cygwin* | *-*-pw32* | *-*-beos*)
+           # These systems don't actually have a C or math library (as such)
+           continue
+           ;;
+         *-*-mingw* | *-*-os2*)
+           # These systems don't actually have a C library (as such)
+           test "X$arg" = "X-lc" && continue
+           ;;
+         *-*-openbsd* | *-*-freebsd*)
+           # Do not include libc due to us having libc/libc_r.
+           test "X$arg" = "X-lc" && continue
+           ;;
+         esac
+        elif test "X$arg" = "X-lc_r"; then
+         case $host in
+        *-*-openbsd* | *-*-freebsd*)
+           # Do not include libc_r directly, use -pthread flag.
+           continue
+           ;;
+         esac
+       fi
+       deplibs="$deplibs $arg"
+       continue
+       ;;
+
+      -module)
+       module=yes
+       continue
+       ;;
+
+      -no-fast-install)
+       fast_install=no
+       continue
+       ;;
+
+      -no-install)
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+         # The PATH hackery in wrapper scripts is required on Windows
+         # in order for the loader to find any dlls it needs.
+         $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+         $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+         fast_install=no
+         ;;
+       *) no_install=yes ;;
+       esac
+       continue
+       ;;
+
+      -no-undefined)
+       allow_undefined=no
+       continue
+       ;;
+
+      -o) prev=output ;;
+
+      -release)
+       prev=release
+       continue
+       ;;
+
+      -rpath)
+       prev=rpath
+       continue
+       ;;
+
+      -R)
+       prev=xrpath
+       continue
+       ;;
+
+      -R*)
+       dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+       # We need an absolute path.
+       case $dir in
+       [\\/]* | [A-Za-z]:[\\/]*) ;;
+       *)
+         $echo "$modename: only absolute run-paths are allowed" 1>&2
+         exit 1
+         ;;
+       esac
+       case "$xrpath " in
+       *" $dir "*) ;;
+       *) xrpath="$xrpath $dir" ;;
+       esac
+       continue
+       ;;
+
+      -static)
+       # The effects of -static are defined in a previous loop.
+       # We used to do the same as -all-static on platforms that
+       # didn't have a PIC flag, but the assumption that the effects
+       # would be equivalent was wrong.  It would break on at least
+       # Digital Unix and AIX.
+       continue
+       ;;
+
+      -thread-safe)
+       thread_safe=yes
+       continue
+       ;;
+
+      -version-info)
+       prev=vinfo
+       continue
+       ;;
+
+      -Wc,*)
+       args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+       arg=
+       save_ifs="$IFS"; IFS=','
+       for flag in $args; do
+         IFS="$save_ifs"
+         case $flag in
+           *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \       ]*|*]*|"")
+           flag="\"$flag\""
+           ;;
+         esac
+         arg="$arg $wl$flag"
+         compiler_flags="$compiler_flags $flag"
+       done
+       IFS="$save_ifs"
+       arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+       ;;
+
+      -Wl,*)
+       args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+       arg=
+       save_ifs="$IFS"; IFS=','
+       for flag in $args; do
+         IFS="$save_ifs"
+         case $flag in
+           *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \       ]*|*]*|"")
+           flag="\"$flag\""
+           ;;
+         esac
+         arg="$arg $wl$flag"
+         compiler_flags="$compiler_flags $wl$flag"
+         linker_flags="$linker_flags $flag"
+       done
+       IFS="$save_ifs"
+       arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+       ;;
+
+      -Xcompiler)
+       prev=xcompiler
+       continue
+       ;;
+
+      -Xlinker)
+       prev=xlinker
+       continue
+       ;;
+
+      # Some other compiler flag.
+      -* | +*)
+       # Unknown arguments in both finalize_command and compile_command need
+       # to be aesthetically quoted because they are evaled later.
+       arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+       case $arg in
+       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \   ]*|*]*|"")
+         arg="\"$arg\""
+         ;;
+       esac
+       ;;
+
+      *.lo | *.$objext)
+       # A library or standard object.
+       if test "$prev" = dlfiles; then
+         # This file was specified with -dlopen.
+         if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+           dlfiles="$dlfiles $arg"
            prev=
            continue
          else
@@ -571,230 +1231,59 @@ if test -z "$show_help"; then
 
        if test "$prev" = dlprefiles; then
          # Preload the old-style object.
-         dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e 's/\.lo$/\.o/'`
+         dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"`
          prev=
+       else
+         case $arg in
+         *.lo) libobjs="$libobjs $arg" ;;
+         *) objs="$objs $arg" ;;
+         esac
        fi
-       libobjs="$libobjs $arg"
-        ;;
-
-      *.la)
-        # A libtool-controlled library.
-
-        dlname=
-        libdir=
-        library_names=
-        old_library=
-
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $arg | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$arg' is not a valid libtool archive" 1>&2
-          exit 1
-        fi
-
-        # If there is no directory component, then add one.
-        case "$arg" in
-        */* | *\\*) . $arg ;;
-        *) . ./$arg ;;
-        esac
-
-        if test -z "$libdir"; then
-          $echo "$modename: \`$arg' contains no -rpath information" 1>&2
-          exit 1
-        fi
-
-        # Get the name of the library we link against.
-        linklib=
-        for l in $old_library $library_names; do
-          linklib="$l"
-        done
-
-        if test -z "$linklib"; then
-          $echo "$modename: cannot find name of link library for \`$arg'" 1>&2
-          exit 1
-        fi
-
-        # Find the relevant object directory and library name.
-        name=`$echo "X$arg" | $Xsed -e 's%^.*/%%' -e 's/\.la$//' -e 's/^lib//'`
-        dir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
-        if test "X$dir" = "X$arg"; then
-          dir="$objdir"
-        else
-          dir="$dir/$objdir"
-        fi
-
-        # This library was specified with -dlopen.
-        if test "$prev" = dlfiles; then
-          dlfiles="$dlfiles $arg"
-          if test -z "$dlname"; then
-            # If there is no dlname, we need to preload.
-            prev=dlprefiles
-          else
-            # We should not create a dependency on this library, but we
-           # may need any libraries it requires.
-           compile_command="$compile_command$dependency_libs"
-           finalize_command="$finalize_command$dependency_libs"
-            prev=
-            continue
-          fi
-        fi
-
-        # The library was specified with -dlpreopen.
-        if test "$prev" = dlprefiles; then
-          # Prefer using a static library (so that no silly _DYNAMIC symbols
-          # are required to link).
-          if test -n "$old_library"; then
-            dlprefiles="$dlprefiles $dir/$old_library"
-          else
-            dlprefiles="$dlprefiles $dir/$linklib"
-          fi
-          prev=
-        fi
-
-        if test "$build_libtool_libs" = yes && test -n "$library_names"; then
-          link_against_libtool_libs="$link_against_libtool_libs $arg"
-          if test -n "$shlibpath_var"; then
-            # Make sure the rpath contains only unique directories.
-            case "$temp_rpath " in
-            *" $dir "*) ;;
-            *) temp_rpath="$temp_rpath $dir" ;;
-            esac
-          fi
+       ;;
 
-         # This is the magic to use -rpath.
-          if test -n "$hardcode_libdir_flag_spec"; then
-            if test -n "$hardcode_libdir_separator"; then
-              if test -z "$hardcode_libdirs"; then
-                # Put the magic libdir with the hardcode flag.
-                hardcode_libdirs="$libdir"
-                libdir="@HARDCODE_LIBDIRS@"
-              else
-                # Just accumulate the unique libdirs.
-               case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
-               *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
-                 ;;
-               *)
-                 hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
-                 ;;
-               esac
-                libdir=
-              fi
-            fi
-
-            if test -n "$libdir"; then
-              eval flag=\"$hardcode_libdir_flag_spec\"
-
-              compile_command="$compile_command $flag"
-              finalize_command="$finalize_command $flag"
-            fi
-          elif test -n "$runpath_var"; then
-            # Do the same for the permanent run path.
-            case "$perm_rpath " in
-            *" $libdir "*) ;;
-            *) perm_rpath="$perm_rpath $libdir" ;;
-            esac
-          fi
-
-
-          case "$hardcode_action" in
-          immediate)
-            if test "$hardcode_direct" = no; then
-              compile_command="$compile_command $dir/$linklib"
-            elif test "$hardcode_minus_L" = no; then
-              compile_command="$compile_command -L$dir -l$name"
-            elif test "$hardcode_shlibpath_var" = no; then
-              compile_shlibpath="$compile_shlibpath$dir:"
-              compile_command="$compile_command -l$name"
-            fi
-            ;;
-
-          relink)
-            # We need an absolute path.
-            case "$dir" in
-            /* | [A-Za-z]:\\*) ;;
-            *)
-              absdir=`cd "$dir" && pwd`
-              if test -z "$absdir"; then
-                $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
-                exit 1
-              fi
-              dir="$absdir"
-              ;;
-            esac
-
-            if test "$hardcode_direct" = yes; then
-              compile_command="$compile_command $dir/$linklib"
-            elif test "$hardcode_minus_L" = yes; then
-              compile_command="$compile_command -L$dir -l$name"
-            elif test "$hardcode_shlibpath_var" = yes; then
-              compile_shlibpath="$compile_shlibpath$dir:"
-              compile_command="$compile_command -l$name"
-            fi
-            ;;
-
-          *)
-            $echo "$modename: \`$hardcode_action' is an unknown hardcode action" 1>&2
-            exit 1
-            ;;
-          esac
-
-          # Finalize command for both is simple: just hardcode it.
-          if test "$hardcode_direct" = yes; then
-            finalize_command="$finalize_command $libdir/$linklib"
-          elif test "$hardcode_minus_L" = yes; then
-            finalize_command="$finalize_command -L$libdir -l$name"
-          elif test "$hardcode_shlibpath_var" = yes; then
-            finalize_shlibpath="$finalize_shlibpath$libdir:"
-            finalize_command="$finalize_command -l$name"
-          else
-            # We cannot seem to hardcode it, guess we'll fake it.
-            finalize_command="$finalize_command -L$libdir -l$name"
-          fi
-        else
-          # Transform directly to old archives if we don't build new libraries.
-          if test -n "$pic_flag" && test -z "$old_library"; then
-            $echo "$modename: cannot find static library for \`$arg'" 1>&2
-            exit 1
-          fi
+      *.$libext)
+       # An archive.
+       deplibs="$deplibs $arg"
+       old_deplibs="$old_deplibs $arg"
+       continue
+       ;;
 
-         # Here we assume that one of hardcode_direct or hardcode_minus_L
-         # is not unsupported.  This is valid on all known static and
-         # shared platforms.
-         if test "$hardcode_direct" != unsupported; then
-           test -n "$old_library" && linklib="$old_library"
-           compile_command="$compile_command $dir/$linklib"
-           finalize_command="$finalize_command $dir/$linklib"
-         else
-           compile_command="$compile_command -L$dir -l$name"
-           finalize_command="$finalize_command -L$dir -l$name"
-         fi
-        fi
+      *.la)
+       # A libtool-controlled library.
 
-       # Add in any libraries that this one depends upon.
-       compile_command="$compile_command$dependency_libs"
-       finalize_command="$finalize_command$dependency_libs"
+       if test "$prev" = dlfiles; then
+         # This library was specified with -dlopen.
+         dlfiles="$dlfiles $arg"
+         prev=
+       elif test "$prev" = dlprefiles; then
+         # The library was specified with -dlpreopen.
+         dlprefiles="$dlprefiles $arg"
+         prev=
+       else
+         deplibs="$deplibs $arg"
+       fi
        continue
-        ;;
+       ;;
 
       # Some other compiler argument.
       *)
        # Unknown arguments in both finalize_command and compile_command need
        # to be aesthetically quoted because they are evaled later.
        arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-       case "$arg" in
-       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \   ]*|*]*)
+       case $arg in
+       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \   ]*|*]*|"")
          arg="\"$arg\""
          ;;
        esac
-        ;;
-      esac
+       ;;
+      esac # arg
 
       # Now actually substitute the argument into the commands.
       if test -n "$arg"; then
        compile_command="$compile_command $arg"
        finalize_command="$finalize_command $arg"
       fi
-    done
+    done # argument parsing loop
 
     if test -n "$prev"; then
       $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
@@ -802,722 +1291,2517 @@ if test -z "$show_help"; then
       exit 1
     fi
 
-    if test -n "$vinfo" && test -n "$release"; then
-      $echo "$modename: you cannot specify both \`-version-info' and \`-release'" 1>&2
-      $echo "$help" 1>&2
-      exit 1
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      compile_command="$compile_command $arg"
+      finalize_command="$finalize_command $arg"
+    fi
+
+    # calculate the name of the file, without its directory
+    outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$output_objdir" = "X$output"; then
+      output_objdir="$objdir"
+    else
+      output_objdir="$output_objdir/$objdir"
+    fi
+    # Create the object directory.
+    if test ! -d $output_objdir; then
+      $show "$mkdir $output_objdir"
+      $run $mkdir $output_objdir
+      status=$?
+      if test $status -ne 0 && test ! -d $output_objdir; then
+       exit $status
+      fi
     fi
 
-    oldlib=
-    oldobjs=
-    case "$output" in
+    # Determine the type of output
+    case $output in
     "")
       $echo "$modename: you must specify an output file" 1>&2
       $echo "$help" 1>&2
       exit 1
       ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
 
-    */* | *\\*)
-      $echo "$modename: output file \`$output' must have no directory components" 1>&2
-      exit 1
-      ;;
+    specialdeplibs=
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if test "X$duplicate_deps" = "Xyes" ; then
+       case "$libs " in
+       *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+       esac
+      fi
+      libs="$libs $deplib"
+    done
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    notinst_path= # paths that contain not-installed libtool libraries
+    case $linkmode in
+    lib)
+       passes="conv link"
+       for file in $dlfiles $dlprefiles; do
+         case $file in
+         *.la) ;;
+         *)
+           $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2
+           exit 1
+           ;;
+         esac
+       done
+       ;;
+    prog)
+       compile_deplibs=
+       finalize_deplibs=
+       alldeplibs=no
+       newdlfiles=
+       newdlprefiles=
+       passes="conv scan dlopen dlpreopen link"
+       ;;
+    *)  passes="conv"
+       ;;
+    esac
+    for pass in $passes; do
+      if test $linkmode = prog; then
+       # Determine which files to process
+       case $pass in
+       dlopen)
+         libs="$dlfiles"
+         save_deplibs="$deplibs" # Collect dlpreopened libraries
+         deplibs=
+         ;;
+       dlpreopen) libs="$dlprefiles" ;;
+       link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+       esac
+      fi
+      for deplib in $libs; do
+       lib=
+       found=no
+       case $deplib in
+       -l*)
+         if test $linkmode = oldlib && test $linkmode = obj; then
+           $echo "$modename: warning: \`-l' is ignored for archives/objects: $deplib" 1>&2
+           continue
+         fi
+         if test $pass = conv; then
+           deplibs="$deplib $deplibs"
+           continue
+         fi
+         name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+         for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do
+           # Search the libtool library
+           lib="$searchdir/lib${name}.la"
+           if test -f "$lib"; then
+             found=yes
+             break
+           fi
+         done
+         if test "$found" != yes; then
+           # deplib doesn't seem to be a libtool library
+           if test "$linkmode,$pass" = "prog,link"; then
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           else
+             deplibs="$deplib $deplibs"
+             test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs"
+           fi
+           continue
+         fi
+         ;; # -l
+       -L*)
+         case $linkmode in
+         lib)
+           deplibs="$deplib $deplibs"
+           test $pass = conv && continue
+           newdependency_libs="$deplib $newdependency_libs"
+           newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+           ;;
+         prog)
+           if test $pass = conv; then
+             deplibs="$deplib $deplibs"
+             continue
+           fi
+           if test $pass = scan; then
+             deplibs="$deplib $deplibs"
+             newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+           else
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           fi
+           ;;
+         *)
+           $echo "$modename: warning: \`-L' is ignored for archives/objects: $deplib" 1>&2
+           ;;
+         esac # linkmode
+         continue
+         ;; # -L
+       -R*)
+         if test $pass = link; then
+           dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+           # Make sure the xrpath contains only unique directories.
+           case "$xrpath " in
+           *" $dir "*) ;;
+           *) xrpath="$xrpath $dir" ;;
+           esac
+         fi
+         deplibs="$deplib $deplibs"
+         continue
+         ;;
+       *.la) lib="$deplib" ;;
+       *.$libext)
+         if test $pass = conv; then
+           deplibs="$deplib $deplibs"
+           continue
+         fi
+         case $linkmode in
+         lib)
+           if test "$deplibs_check_method" != pass_all; then
+             echo
+             echo "*** Warning: Trying to link with static lib archive $deplib."
+             echo "*** I have the capability to make that library automatically link in when"
+             echo "*** you link to this library.  But I can only do this if you have a"
+             echo "*** shared version of the library, which you do not appear to have"
+             echo "*** because the file extensions .$libext of this argument makes me believe"
+             echo "*** that it is just a static archive that I should not used here."
+           else
+             echo
+             echo "*** Warning: Linking the shared library $output against the"
+             echo "*** static library $deplib is not portable!"
+             deplibs="$deplib $deplibs"
+           fi
+           continue
+           ;;
+         prog)
+           if test $pass != link; then
+             deplibs="$deplib $deplibs"
+           else
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           fi
+           continue
+           ;;
+         esac # linkmode
+         ;; # *.$libext
+       *.lo | *.$objext)
+         if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+           # If there is no dlopen support or we're linking statically,
+           # we need to preload.
+           newdlprefiles="$newdlprefiles $deplib"
+           compile_deplibs="$deplib $compile_deplibs"
+           finalize_deplibs="$deplib $finalize_deplibs"
+         else
+           newdlfiles="$newdlfiles $deplib"
+         fi
+         continue
+         ;;
+       %DEPLIBS%)
+         alldeplibs=yes
+         continue
+         ;;
+       esac # case $deplib
+       if test $found = yes || test -f "$lib"; then :
+       else
+         $echo "$modename: cannot find the library \`$lib'" 1>&2
+         exit 1
+       fi
+
+       # Check to see that this really is a libtool archive.
+       if (${SED} -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+       else
+         $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+         exit 1
+       fi
+
+       ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+       test "X$ladir" = "X$lib" && ladir="."
+
+       dlname=
+       dlopen=
+       dlpreopen=
+       libdir=
+       library_names=
+       old_library=
+       # If the library was installed with an old release of libtool,
+       # it will not redefine variable installed.
+       installed=yes
+
+       # Read the .la file
+       case $lib in
+       */* | *\\*) . $lib ;;
+       *) . ./$lib ;;
+       esac
+
+       if test "$linkmode,$pass" = "lib,link" ||
+          test "$linkmode,$pass" = "prog,scan" ||
+          { test $linkmode = oldlib && test $linkmode = obj; }; then
+          # Add dl[pre]opened files of deplib
+         test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+         test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+       fi
+
+       if test $pass = conv; then
+         # Only check for convenience libraries
+         deplibs="$lib $deplibs"
+         if test -z "$libdir"; then
+           if test -z "$old_library"; then
+             $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+             exit 1
+           fi
+           # It is a libtool convenience library, so add in its objects.
+           convenience="$convenience $ladir/$objdir/$old_library"
+           old_convenience="$old_convenience $ladir/$objdir/$old_library"
+           tmp_libs=
+           for deplib in $dependency_libs; do
+             deplibs="$deplib $deplibs"
+              if test "X$duplicate_deps" = "Xyes" ; then
+               case "$tmp_libs " in
+               *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+               esac
+              fi
+             tmp_libs="$tmp_libs $deplib"
+           done
+         elif test $linkmode != prog && test $linkmode != lib; then
+           $echo "$modename: \`$lib' is not a convenience library" 1>&2
+           exit 1
+         fi
+         continue
+       fi # $pass = conv
+
+       # Get the name of the library we link against.
+       linklib=
+       for l in $old_library $library_names; do
+         linklib="$l"
+       done
+       if test -z "$linklib"; then
+         $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+         exit 1
+       fi
+
+       # This library was specified with -dlopen.
+       if test $pass = dlopen; then
+         if test -z "$libdir"; then
+           $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2
+           exit 1
+         fi
+         if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+           # If there is no dlname, no dlopen support or we're linking
+           # statically, we need to preload.
+           dlprefiles="$dlprefiles $lib"
+         else
+           newdlfiles="$newdlfiles $lib"
+         fi
+         continue
+       fi # $pass = dlopen
+
+       # We need an absolute path.
+       case $ladir in
+       [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+       *)
+         abs_ladir=`cd "$ladir" && pwd`
+         if test -z "$abs_ladir"; then
+           $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+           $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+           abs_ladir="$ladir"
+         fi
+         ;;
+       esac
+       laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+       # Find the relevant object directory and library name.
+       if test "X$installed" = Xyes; then
+         if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+           $echo "$modename: warning: library \`$lib' was moved." 1>&2
+           dir="$ladir"
+           absdir="$abs_ladir"
+           libdir="$abs_ladir"
+         else
+           dir="$libdir"
+           absdir="$libdir"
+         fi
+       else
+         dir="$ladir/$objdir"
+         absdir="$abs_ladir/$objdir"
+         # Remove this search path later
+         notinst_path="$notinst_path $abs_ladir"
+       fi # $installed = yes
+       name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+       # This library was specified with -dlpreopen.
+       if test $pass = dlpreopen; then
+         if test -z "$libdir"; then
+           $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2
+           exit 1
+         fi
+         # Prefer using a static library (so that no silly _DYNAMIC symbols
+         # are required to link).
+         if test -n "$old_library"; then
+           newdlprefiles="$newdlprefiles $dir/$old_library"
+         # Otherwise, use the dlname, so that lt_dlopen finds it.
+         elif test -n "$dlname"; then
+           newdlprefiles="$newdlprefiles $dir/$dlname"
+         else
+           newdlprefiles="$newdlprefiles $dir/$linklib"
+         fi
+       fi # $pass = dlpreopen
+
+       if test -z "$libdir"; then
+         # Link the convenience library
+         if test $linkmode = lib; then
+           deplibs="$dir/$old_library $deplibs"
+         elif test "$linkmode,$pass" = "prog,link"; then
+           compile_deplibs="$dir/$old_library $compile_deplibs"
+           finalize_deplibs="$dir/$old_library $finalize_deplibs"
+         else
+           deplibs="$lib $deplibs"
+         fi
+         continue
+       fi
+
+       if test $linkmode = prog && test $pass != link; then
+         newlib_search_path="$newlib_search_path $ladir"
+         deplibs="$lib $deplibs"
+
+         linkalldeplibs=no
+         if test "$link_all_deplibs" != no || test -z "$library_names" ||
+            test "$build_libtool_libs" = no; then
+           linkalldeplibs=yes
+         fi
+
+         tmp_libs=
+         for deplib in $dependency_libs; do
+           case $deplib in
+           -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+           esac
+           # Need to link against all dependency_libs?
+           if test $linkalldeplibs = yes; then
+             deplibs="$deplib $deplibs"
+           else
+             # Need to hardcode shared library paths
+             # or/and link against static libraries
+             newdependency_libs="$deplib $newdependency_libs"
+           fi
+           if test "X$duplicate_deps" = "Xyes" ; then
+             case "$tmp_libs " in
+             *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+             esac
+           fi
+           tmp_libs="$tmp_libs $deplib"
+         done # for deplib
+         continue
+       fi # $linkmode = prog...
+
+       link_static=no # Whether the deplib will be linked statically
+       if test -n "$library_names" &&
+          { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+         # Link against this shared library
+
+         if test "$linkmode,$pass" = "prog,link" ||
+          { test $linkmode = lib && test $hardcode_into_libs = yes; }; then
+           # Hardcode the library path.
+           # Skip directories that are in the system default run-time
+           # search path.
+           case " $sys_lib_dlsearch_path " in
+           *" $absdir "*) ;;
+           *)
+             case "$compile_rpath " in
+             *" $absdir "*) ;;
+             *) compile_rpath="$compile_rpath $absdir"
+             esac
+             ;;
+           esac
+           case " $sys_lib_dlsearch_path " in
+           *" $libdir "*) ;;
+           *)
+             case "$finalize_rpath " in
+             *" $libdir "*) ;;
+             *) finalize_rpath="$finalize_rpath $libdir"
+             esac
+             ;;
+           esac
+           if test $linkmode = prog; then
+             # We need to hardcode the library path
+             if test -n "$shlibpath_var"; then
+               # Make sure the rpath contains only unique directories.
+               case "$temp_rpath " in
+               *" $dir "*) ;;
+               *" $absdir "*) ;;
+               *) temp_rpath="$temp_rpath $dir" ;;
+               esac
+             fi
+           fi
+         fi # $linkmode,$pass = prog,link...
+
+         if test "$alldeplibs" = yes &&
+            { test "$deplibs_check_method" = pass_all ||
+              { test "$build_libtool_libs" = yes &&
+                test -n "$library_names"; }; }; then
+           # We only need to search for static libraries
+           continue
+         fi
+
+         if test "$installed" = no; then
+           notinst_deplibs="$notinst_deplibs $lib"
+           need_relink=yes
+         fi
+
+         if test -n "$old_archive_from_expsyms_cmds"; then
+           # figure out the soname
+           set dummy $library_names
+           realname="$2"
+           shift; shift
+           libname=`eval \\$echo \"$libname_spec\"`
+           # use dlname if we got it. it's perfectly good, no?
+           if test -n "$dlname"; then
+             soname="$dlname"
+           elif test -n "$soname_spec"; then
+             # bleh windows
+             case $host in
+             *cygwin*)
+               major=`expr $current - $age`
+               versuffix="-$major"
+               ;;
+             esac
+             eval soname=\"$soname_spec\"
+           else
+             soname="$realname"
+           fi
+
+           # Make a new name for the extract_expsyms_cmds to use
+           soroot="$soname"
+           soname=`echo $soroot | ${SED} -e 's/^.*\///'`
+           newlib="libimp-`echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a"
+
+           # If the library has no export list, then create one now
+           if test -f "$output_objdir/$soname-def"; then :
+           else
+             $show "extracting exported symbol list from \`$soname'"
+             save_ifs="$IFS"; IFS='~'
+             eval cmds=\"$extract_expsyms_cmds\"
+             for cmd in $cmds; do
+               IFS="$save_ifs"
+               $show "$cmd"
+               $run eval "$cmd" || exit $?
+             done
+             IFS="$save_ifs"
+           fi
+
+           # Create $newlib
+           if test -f "$output_objdir/$newlib"; then :; else
+             $show "generating import library for \`$soname'"
+             save_ifs="$IFS"; IFS='~'
+             eval cmds=\"$old_archive_from_expsyms_cmds\"
+             for cmd in $cmds; do
+               IFS="$save_ifs"
+               $show "$cmd"
+               $run eval "$cmd" || exit $?
+             done
+             IFS="$save_ifs"
+           fi
+           # make sure the library variables are pointing to the new library
+           dir=$output_objdir
+           linklib=$newlib
+         fi # test -n $old_archive_from_expsyms_cmds
+
+         if test $linkmode = prog || test "$mode" != relink; then
+           add_shlibpath=
+           add_dir=
+           add=
+           lib_linked=yes
+           case $hardcode_action in
+           immediate | unsupported)
+             if test "$hardcode_direct" = no; then
+               add="$dir/$linklib"
+             elif test "$hardcode_minus_L" = no; then
+               case $host in
+               *-*-sunos*) add_shlibpath="$dir" ;;
+               esac
+               add_dir="-L$dir"
+               add="-l$name"
+             elif test "$hardcode_shlibpath_var" = no; then
+               add_shlibpath="$dir"
+               add="-l$name"
+             else
+               lib_linked=no
+             fi
+             ;;
+           relink)
+             if test "$hardcode_direct" = yes; then
+               add="$dir/$linklib"
+             elif test "$hardcode_minus_L" = yes; then
+               add_dir="-L$dir"
+               add="-l$name"
+             elif test "$hardcode_shlibpath_var" = yes; then
+               add_shlibpath="$dir"
+               add="-l$name"
+             else
+               lib_linked=no
+             fi
+             ;;
+           *) lib_linked=no ;;
+           esac
+
+           if test "$lib_linked" != yes; then
+             $echo "$modename: configuration error: unsupported hardcode properties"
+             exit 1
+           fi
+
+           if test -n "$add_shlibpath"; then
+             case :$compile_shlibpath: in
+             *":$add_shlibpath:"*) ;;
+             *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+             esac
+           fi
+           if test $linkmode = prog; then
+             test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+             test -n "$add" && compile_deplibs="$add $compile_deplibs"
+           else
+             test -n "$add_dir" && deplibs="$add_dir $deplibs"
+             test -n "$add" && deplibs="$add $deplibs"
+             if test "$hardcode_direct" != yes && \
+                test "$hardcode_minus_L" != yes && \
+                test "$hardcode_shlibpath_var" = yes; then
+               case :$finalize_shlibpath: in
+               *":$libdir:"*) ;;
+               *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+               esac
+             fi
+           fi
+         fi
+
+         if test $linkmode = prog || test "$mode" = relink; then
+           add_shlibpath=
+           add_dir=
+           add=
+           # Finalize command for both is simple: just hardcode it.
+           if test "$hardcode_direct" = yes; then
+             add="$libdir/$linklib"
+           elif test "$hardcode_minus_L" = yes; then
+             add_dir="-L$libdir"
+             add="-l$name"
+           elif test "$hardcode_shlibpath_var" = yes; then
+             case :$finalize_shlibpath: in
+             *":$libdir:"*) ;;
+             *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+             esac
+             add="-l$name"
+           else
+             # We cannot seem to hardcode it, guess we'll fake it.
+             add_dir="-L$libdir"
+             add="-l$name"
+           fi
+
+           if test $linkmode = prog; then
+             test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+             test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+           else
+             test -n "$add_dir" && deplibs="$add_dir $deplibs"
+             test -n "$add" && deplibs="$add $deplibs"
+           fi
+         fi
+       elif test $linkmode = prog; then
+         if test "$alldeplibs" = yes &&
+            { test "$deplibs_check_method" = pass_all ||
+              { test "$build_libtool_libs" = yes &&
+                test -n "$library_names"; }; }; then
+           # We only need to search for static libraries
+           continue
+         fi
+
+         # Try to link the static library
+         # Here we assume that one of hardcode_direct or hardcode_minus_L
+         # is not unsupported.  This is valid on all known static and
+         # shared platforms.
+         if test "$hardcode_direct" != unsupported; then
+           test -n "$old_library" && linklib="$old_library"
+           compile_deplibs="$dir/$linklib $compile_deplibs"
+           finalize_deplibs="$dir/$linklib $finalize_deplibs"
+         else
+           compile_deplibs="-l$name -L$dir $compile_deplibs"
+           finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+         fi
+       elif test "$build_libtool_libs" = yes; then
+         # Not a shared library
+         if test "$deplibs_check_method" != pass_all; then
+           # We're trying link a shared library against a static one
+           # but the system doesn't support it.
+
+           # Just print a warning and add the library to dependency_libs so
+           # that the program can be linked against the static library.
+           echo
+           echo "*** Warning: This system can not link to static lib archive $lib."
+           echo "*** I have the capability to make that library automatically link in when"
+           echo "*** you link to this library.  But I can only do this if you have a"
+           echo "*** shared version of the library, which you do not appear to have."
+           if test "$module" = yes; then
+             echo "*** But as you try to build a module library, libtool will still create "
+             echo "*** a static module, that should work as long as the dlopening application"
+             echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+             if test -z "$global_symbol_pipe"; then
+               echo
+               echo "*** However, this would only work if libtool was able to extract symbol"
+               echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+               echo "*** not find such a program.  So, this module is probably useless."
+               echo "*** \`nm' from GNU binutils and a full rebuild may help."
+             fi
+             if test "$build_old_libs" = no; then
+               build_libtool_libs=module
+               build_old_libs=yes
+             else
+               build_libtool_libs=no
+             fi
+           fi
+         else
+           convenience="$convenience $dir/$old_library"
+           old_convenience="$old_convenience $dir/$old_library"
+           deplibs="$dir/$old_library $deplibs"
+           link_static=yes
+         fi
+       fi # link shared/static library?
+
+       if test $linkmode = lib; then
+         if test -n "$dependency_libs" &&
+            { test $hardcode_into_libs != yes || test $build_old_libs = yes ||
+              test $link_static = yes; }; then
+           # Extract -R from dependency_libs
+           temp_deplibs=
+           for libdir in $dependency_libs; do
+             case $libdir in
+             -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+                  case " $xrpath " in
+                  *" $temp_xrpath "*) ;;
+                  *) xrpath="$xrpath $temp_xrpath";;
+                  esac;;
+             *) temp_deplibs="$temp_deplibs $libdir";;
+             esac
+           done
+           dependency_libs="$temp_deplibs"
+         fi
+
+         newlib_search_path="$newlib_search_path $absdir"
+         # Link against this library
+         test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+         # ... and its dependency_libs
+         tmp_libs=
+         for deplib in $dependency_libs; do
+           newdependency_libs="$deplib $newdependency_libs"
+           if test "X$duplicate_deps" = "Xyes" ; then
+             case "$tmp_libs " in
+             *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+             esac
+           fi
+           tmp_libs="$tmp_libs $deplib"
+         done
+
+         if test $link_all_deplibs != no; then
+           # Add the search paths of all dependency libraries
+           for deplib in $dependency_libs; do
+             case $deplib in
+             -L*) path="$deplib" ;;
+             *.la)
+               dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+               test "X$dir" = "X$deplib" && dir="."
+               # We need an absolute path.
+               case $dir in
+               [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+               *)
+                 absdir=`cd "$dir" && pwd`
+                 if test -z "$absdir"; then
+                   $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+                   absdir="$dir"
+                 fi
+                 ;;
+               esac
+               if grep "^installed=no" $deplib > /dev/null; then
+                 path="-L$absdir/$objdir"
+               else
+                 eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+                 if test -z "$libdir"; then
+                   $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+                   exit 1
+                 fi
+                 if test "$absdir" != "$libdir"; then
+                   $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+                 fi
+                 path="-L$absdir"
+               fi
+               ;;
+             *) continue ;;
+             esac
+             case " $deplibs " in
+             *" $path "*) ;;
+             *) deplibs="$deplibs $path" ;;
+             esac
+           done
+         fi # link_all_deplibs != no
+       fi # linkmode = lib
+      done # for deplib in $libs
+      if test $pass = dlpreopen; then
+       # Link the dlpreopened libraries before other libraries
+       for deplib in $save_deplibs; do
+         deplibs="$deplib $deplibs"
+       done
+      fi
+      if test $pass != dlopen; then
+       test $pass != scan && dependency_libs="$newdependency_libs"
+       if test $pass != conv; then
+         # Make sure lib_search_path contains only unique directories.
+         lib_search_path=
+         for dir in $newlib_search_path; do
+           case "$lib_search_path " in
+           *" $dir "*) ;;
+           *) lib_search_path="$lib_search_path $dir" ;;
+           esac
+         done
+         newlib_search_path=
+       fi
+
+       if test "$linkmode,$pass" != "prog,link"; then
+         vars="deplibs"
+       else
+         vars="compile_deplibs finalize_deplibs"
+       fi
+       for var in $vars dependency_libs; do
+         # Add libraries to $var in reverse order
+         eval tmp_libs=\"\$$var\"
+         new_libs=
+         for deplib in $tmp_libs; do
+           case $deplib in
+           -L*) new_libs="$deplib $new_libs" ;;
+           *)
+             case " $specialdeplibs " in
+             *" $deplib "*) new_libs="$deplib $new_libs" ;;
+             *)
+               case " $new_libs " in
+               *" $deplib "*) ;;
+               *) new_libs="$deplib $new_libs" ;;
+               esac
+               ;;
+             esac
+             ;;
+           esac
+         done
+         tmp_libs=
+         for deplib in $new_libs; do
+           case $deplib in
+           -L*)
+             case " $tmp_libs " in
+             *" $deplib "*) ;;
+             *) tmp_libs="$tmp_libs $deplib" ;;
+             esac
+             ;;
+           *) tmp_libs="$tmp_libs $deplib" ;;
+           esac
+         done
+         eval $var=\"$tmp_libs\"
+       done # for var
+      fi
+      if test "$pass" = "conv" &&
+       { test "$linkmode" = "lib" || test "$linkmode" = "prog"; }; then
+       libs="$deplibs" # reset libs
+       deplibs=
+      fi
+    done # for pass
+    if test $linkmode = prog; then
+      dlfiles="$newdlfiles"
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+       $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+       $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+       $echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+       $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$release"; then
+       $echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+       $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+      fi
 
-    *.a)
       # Now set the variables for building old libraries.
       build_libtool_libs=no
-      build_old_libs=yes
-      oldlib="$output"
-      $show "$rm $oldlib"
-      $run $rm $oldlib
+      oldlibs="$output"
+      objs="$objs$old_deplibs"
       ;;
 
-    *.la)
+    lib)
       # Make sure we only generate libraries of the form `libNAME.la'.
-      case "$output" in
-      lib*) ;;
+      case $outputname in
+      lib*)
+       name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+       eval libname=\"$libname_spec\"
+       ;;
       *)
-       $echo "$modename: libtool library \`$arg' must begin with \`lib'" 1>&2
-       $echo "$help" 1>&2
-       exit 1
+       if test "$module" = no; then
+         $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+       fi
+       if test "$need_lib_prefix" != no; then
+         # Add the "lib" prefix for modules if required
+         name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+         eval libname=\"$libname_spec\"
+       else
+         libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+       fi
        ;;
       esac
 
-      name=`$echo "X$output" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
-      eval libname=\"$libname_spec\"
+      if test -n "$objs"; then
+       if test "$deplibs_check_method" != pass_all; then
+         $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+         exit 1
+       else
+         echo
+         echo "*** Warning: Linking the shared library $output against the non-libtool"
+         echo "*** objects $objs is not portable!"
+         libobjs="$libobjs $objs"
+       fi
+      fi
+
+      if test "$dlself" != no; then
+       $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+      fi
+
+      set dummy $rpath
+      if test $# -gt 2; then
+       $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+      fi
+      install_libdir="$2"
+
+      oldlibs=
+      if test -z "$rpath"; then
+       if test "$build_libtool_libs" = yes; then
+         # Building a libtool convenience library.
+         libext=al
+         oldlibs="$output_objdir/$libname.$libext $oldlibs"
+         build_libtool_libs=convenience
+         build_old_libs=yes
+       fi
+
+       if test -n "$vinfo"; then
+         $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2
+       fi
+
+       if test -n "$release"; then
+         $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+       fi
+      else
+
+       # Parse the version information argument.
+       save_ifs="$IFS"; IFS=':'
+       set dummy $vinfo 0 0 0
+       IFS="$save_ifs"
+
+       if test -n "$8"; then
+         $echo "$modename: too many parameters to \`-version-info'" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+       fi
+
+       current="$2"
+       revision="$3"
+       age="$4"
+
+       # Check that each of the things are valid numbers.
+       case $current in
+       0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+       *)
+         $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
+         $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+         exit 1
+         ;;
+       esac
+
+       case $revision in
+       0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+       *)
+         $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
+         $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+         exit 1
+         ;;
+       esac
+
+       case $age in
+       0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+       *)
+         $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
+         $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+         exit 1
+         ;;
+       esac
+
+       if test $age -gt $current; then
+         $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+         $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+         exit 1
+       fi
+
+       # Calculate the version variables.
+       major=
+       versuffix=
+       verstring=
+       case $version_type in
+       none) ;;
+
+       darwin)
+         # Like Linux, but with the current version available in
+         # verstring for coding it into the library header
+         major=.`expr $current - $age`
+         versuffix="$major.$age.$revision"
+         # Darwin ld doesn't like 0 for these options...
+         minor_current=`expr $current + 1`
+         verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
+         ;;
+
+       freebsd-aout)
+         major=".$current"
+         versuffix=".$current.$revision";
+         ;;
+
+       freebsd-elf)
+         major=".$current"
+         versuffix=".$current";
+         ;;
+
+       irix | nonstopux)
+         major=`expr $current - $age + 1`
+
+         case $version_type in
+           nonstopux) verstring_prefix=nonstopux ;;
+           *)         verstring_prefix=sgi ;;
+         esac
+         verstring="$verstring_prefix$major.$revision"
+
+         # Add in all the interfaces that we are compatible with.
+         loop=$revision
+         while test $loop != 0; do
+           iface=`expr $revision - $loop`
+           loop=`expr $loop - 1`
+           verstring="$verstring_prefix$major.$iface:$verstring"
+         done
+
+         # Before this point, $major must not contain `.'.
+         major=.$major
+         versuffix="$major.$revision"
+         ;;
+
+       linux)
+         major=.`expr $current - $age`
+         versuffix="$major.$age.$revision"
+         ;;
+
+       osf)
+         major=.`expr $current - $age`
+         versuffix=".$current.$age.$revision"
+         verstring="$current.$age.$revision"
+
+         # Add in all the interfaces that we are compatible with.
+         loop=$age
+         while test $loop != 0; do
+           iface=`expr $current - $loop`
+           loop=`expr $loop - 1`
+           verstring="$verstring:${iface}.0"
+         done
+
+         # Make executables depend on our current version.
+         verstring="$verstring:${current}.0"
+         ;;
+
+       sunos)
+         major=".$current"
+         versuffix=".$current.$revision"
+         ;;
+
+       windows)
+         # Use '-' rather than '.', since we only want one
+         # extension on DOS 8.3 filesystems.
+         major=`expr $current - $age`
+         versuffix="-$major"
+         ;;
+
+       *)
+         $echo "$modename: unknown library version type \`$version_type'" 1>&2
+         echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+         exit 1
+         ;;
+       esac
+
+       # Clear the version info if we defaulted, and they specified a release.
+       if test -z "$vinfo" && test -n "$release"; then
+         major=
+         verstring="0.0"
+         case $version_type in
+         darwin)
+           # we can't check for "0.0" in archive_cmds due to quoting
+           # problems, so we reset it completely
+           verstring=""
+           ;;
+         *)
+           verstring="0.0"
+           ;;
+         esac
+         if test "$need_version" = no; then
+           versuffix=
+         else
+           versuffix=".0.0"
+         fi
+       fi
+
+       # Remove version info from name if versioning should be avoided
+       if test "$avoid_version" = yes && test "$need_version" = no; then
+         major=
+         versuffix=
+         verstring=""
+       fi
+
+       # Check to see if the archive will have undefined symbols.
+       if test "$allow_undefined" = yes; then
+         if test "$allow_undefined_flag" = unsupported; then
+           $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+           build_libtool_libs=no
+           build_old_libs=yes
+         fi
+       else
+         # Don't allow undefined symbols.
+         allow_undefined_flag="$no_undefined_flag"
+       fi
+      fi
+
+      if test "$mode" != relink; then
+       # Remove our outputs.
+       $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*"
+       $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+       oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+       # Transform .lo files to .o files.
+       oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      for path in $notinst_path; do
+       lib_search_path=`echo "$lib_search_path " | ${SED} -e 's% $path % %g'`
+       deplibs=`echo "$deplibs " | ${SED} -e 's% -L$path % %g'`
+       dependency_libs=`echo "$dependency_libs " | ${SED} -e 's% -L$path % %g'`
+      done
+
+      if test -n "$xrpath"; then
+       # If the user specified any rpath flags, then add them.
+       temp_xrpath=
+       for libdir in $xrpath; do
+         temp_xrpath="$temp_xrpath -R$libdir"
+         case "$finalize_rpath " in
+         *" $libdir "*) ;;
+         *) finalize_rpath="$finalize_rpath $libdir" ;;
+         esac
+       done
+       if test $hardcode_into_libs != yes || test $build_old_libs = yes; then
+         dependency_libs="$temp_xrpath $dependency_libs"
+       fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+       case " $dlprefiles $dlfiles " in
+       *" $lib "*) ;;
+       *) dlfiles="$dlfiles $lib" ;;
+       esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+       case "$dlprefiles " in
+       *" $lib "*) ;;
+       *) dlprefiles="$dlprefiles $lib" ;;
+       esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+       if test -n "$rpath"; then
+         case $host in
+         *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*)
+           # these systems don't actually have a c library (as such)!
+           ;;
+         *-*-rhapsody* | *-*-darwin1.[012])
+           # Rhapsody C library is in the System framework
+           deplibs="$deplibs -framework System"
+           ;;
+         *-*-netbsd*)
+           # Don't link with libc until the a.out ld.so is fixed.
+           ;;
+         *-*-openbsd* | *-*-freebsd*)
+           # Do not include libc due to us having libc/libc_r.
+           ;;
+         *)
+           # Add libc to deplibs on all other systems if necessary.
+           if test $build_libtool_need_lc = "yes"; then
+             deplibs="$deplibs -lc"
+           fi
+           ;;
+         esac
+       fi
+
+       # Transform deplibs into only deplibs that can be linked in shared.
+       name_save=$name
+       libname_save=$libname
+       release_save=$release
+       versuffix_save=$versuffix
+       major_save=$major
+       # I'm not sure if I'm treating the release correctly.  I think
+       # release should show up in the -l (ie -lgmp5) so we don't want to
+       # add it in twice.  Is that correct?
+       release=""
+       versuffix=""
+       major=""
+       newdeplibs=
+       droppeddeps=no
+       case $deplibs_check_method in
+       pass_all)
+         # Don't check for shared/static.  Everything works.
+         # This might be a little naive.  We might want to check
+         # whether the library exists or not.  But this is on
+         # osf3 & osf4 and I'm not really sure... Just
+         # implementing what was already the behaviour.
+         newdeplibs=$deplibs
+         ;;
+       test_compile)
+         # This code stresses the "libraries are programs" paradigm to its
+         # limits. Maybe even breaks it.  We compile a program, linking it
+         # against the deplibs as a proxy for the library.  Then we can check
+         # whether they linked in statically or dynamically with ldd.
+         $rm conftest.c
+         cat > conftest.c <<EOF
+         int main() { return 0; }
+EOF
+         $rm conftest
+         $CC -o conftest conftest.c $deplibs
+         if test $? -eq 0 ; then
+           ldd_output=`ldd conftest`
+           for i in $deplibs; do
+             name="`expr $i : '-l\(.*\)'`"
+             # If $name is empty we are operating on a -L argument.
+             if test -n "$name" && test "$name" != "0"; then
+               libname=`eval \\$echo \"$libname_spec\"`
+               deplib_matches=`eval \\$echo \"$library_names_spec\"`
+               set dummy $deplib_matches
+               deplib_match=$2
+               if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+                 newdeplibs="$newdeplibs $i"
+               else
+                 droppeddeps=yes
+                 echo
+                 echo "*** Warning: dynamic linker does not accept needed library $i."
+                 echo "*** I have the capability to make that library automatically link in when"
+                 echo "*** you link to this library.  But I can only do this if you have a"
+                 echo "*** shared version of the library, which I believe you do not have"
+                 echo "*** because a test_compile did reveal that the linker did not use it for"
+                 echo "*** its dynamic dependency list that programs get resolved with at runtime."
+               fi
+             else
+               newdeplibs="$newdeplibs $i"
+             fi
+           done
+         else
+           # Error occured in the first compile.  Let's try to salvage
+           # the situation: Compile a separate program for each library.
+           for i in $deplibs; do
+             name="`expr $i : '-l\(.*\)'`"
+            # If $name is empty we are operating on a -L argument.
+             if test -n "$name" && test "$name" != "0"; then
+               $rm conftest
+               $CC -o conftest conftest.c $i
+               # Did it work?
+               if test $? -eq 0 ; then
+                 ldd_output=`ldd conftest`
+                 libname=`eval \\$echo \"$libname_spec\"`
+                 deplib_matches=`eval \\$echo \"$library_names_spec\"`
+                 set dummy $deplib_matches
+                 deplib_match=$2
+                 if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+                   newdeplibs="$newdeplibs $i"
+                 else
+                   droppeddeps=yes
+                   echo
+                   echo "*** Warning: dynamic linker does not accept needed library $i."
+                   echo "*** I have the capability to make that library automatically link in when"
+                   echo "*** you link to this library.  But I can only do this if you have a"
+                   echo "*** shared version of the library, which you do not appear to have"
+                   echo "*** because a test_compile did reveal that the linker did not use this one"
+                   echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+                 fi
+               else
+                 droppeddeps=yes
+                 echo
+                 echo "*** Warning!  Library $i is needed by this library but I was not able to"
+                 echo "***  make it link in!  You will probably need to install it or some"
+                 echo "*** library that it depends on before this library will be fully"
+                 echo "*** functional.  Installing it before continuing would be even better."
+               fi
+             else
+               newdeplibs="$newdeplibs $i"
+             fi
+           done
+         fi
+         ;;
+       file_magic*)
+         set dummy $deplibs_check_method
+         file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+         for a_deplib in $deplibs; do
+           name="`expr $a_deplib : '-l\(.*\)'`"
+           # If $name is empty we are operating on a -L argument.
+           if test -n "$name" && test "$name" != "0"; then
+             libname=`eval \\$echo \"$libname_spec\"`
+             for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+                   potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+                   for potent_lib in $potential_libs; do
+                     # Follow soft links.
+                     if ls -lLd "$potent_lib" 2>/dev/null \
+                        | grep " -> " >/dev/null; then
+                       continue
+                     fi
+                     # The statement above tries to avoid entering an
+                     # endless loop below, in case of cyclic links.
+                     # We might still enter an endless loop, since a link
+                     # loop can be closed while we follow links,
+                     # but so what?
+                     potlib="$potent_lib"
+                     while test -h "$potlib" 2>/dev/null; do
+                       potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
+                       case $potliblink in
+                       [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+                       *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+                       esac
+                     done
+                     if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+                        | ${SED} 10q \
+                        | egrep "$file_magic_regex" > /dev/null; then
+                       newdeplibs="$newdeplibs $a_deplib"
+                       a_deplib=""
+                       break 2
+                     fi
+                   done
+             done
+             if test -n "$a_deplib" ; then
+               droppeddeps=yes
+               echo
+               echo "*** Warning: linker path does not have real file for library $a_deplib."
+               echo "*** I have the capability to make that library automatically link in when"
+               echo "*** you link to this library.  But I can only do this if you have a"
+               echo "*** shared version of the library, which you do not appear to have"
+               echo "*** because I did check the linker path looking for a file starting"
+               if test -z "$potlib" ; then
+                 echo "*** with $libname but no candidates were found. (...for file magic test)"
+               else
+                 echo "*** with $libname and none of the candidates passed a file format test"
+                 echo "*** using a file magic. Last file checked: $potlib"
+               fi
+             fi
+           else
+             # Add a -L argument.
+             newdeplibs="$newdeplibs $a_deplib"
+           fi
+         done # Gone through all deplibs.
+         ;;
+       match_pattern*)
+         set dummy $deplibs_check_method
+         match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+         for a_deplib in $deplibs; do
+           name="`expr $a_deplib : '-l\(.*\)'`"
+           # If $name is empty we are operating on a -L argument.
+           if test -n "$name" && test "$name" != "0"; then
+             libname=`eval \\$echo \"$libname_spec\"`
+             for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+               potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+               for potent_lib in $potential_libs; do
+                 potlib="$potent_lib" # see symlink-check below in file_magic test
+                 if eval echo \"$potent_lib\" 2>/dev/null \
+                     | ${SED} 10q \
+                     | egrep "$match_pattern_regex" > /dev/null; then
+                   newdeplibs="$newdeplibs $a_deplib"
+                   a_deplib=""
+                   break 2
+                 fi
+               done
+             done
+             if test -n "$a_deplib" ; then
+               droppeddeps=yes
+               echo
+               echo "*** Warning: linker path does not have real file for library $a_deplib."
+               echo "*** I have the capability to make that library automatically link in when"
+               echo "*** you link to this library.  But I can only do this if you have a"
+               echo "*** shared version of the library, which you do not appear to have"
+               echo "*** because I did check the linker path looking for a file starting"
+               if test -z "$potlib" ; then
+                 echo "*** with $libname but no candidates were found. (...for regex pattern test)"
+               else
+                 echo "*** with $libname and none of the candidates passed a file format test"
+                 echo "*** using a regex pattern. Last file checked: $potlib"
+               fi
+             fi
+           else
+             # Add a -L argument.
+             newdeplibs="$newdeplibs $a_deplib"
+           fi
+         done # Gone through all deplibs.
+         ;;
+       none | unknown | *)
+         newdeplibs=""
+         if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+              -e 's/ -[LR][^ ]*//g' -e 's/[    ]//g' |
+            grep . >/dev/null; then
+           echo
+           if test "X$deplibs_check_method" = "Xnone"; then
+             echo "*** Warning: inter-library dependencies are not supported in this platform."
+           else
+             echo "*** Warning: inter-library dependencies are not known to be supported."
+           fi
+           echo "*** All declared inter-library dependencies are being dropped."
+           droppeddeps=yes
+         fi
+         ;;
+       esac
+       versuffix=$versuffix_save
+       major=$major_save
+       release=$release_save
+       libname=$libname_save
+       name=$name_save
+
+       case $host in
+       *-*-rhapsody* | *-*-darwin1.[012])
+         # On Rhapsody replace the C library is the System framework
+         newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'`
+         ;;
+       esac
+
+       if test "$droppeddeps" = yes; then
+         if test "$module" = yes; then
+           echo
+           echo "*** Warning: libtool could not satisfy all declared inter-library"
+           echo "*** dependencies of module $libname.  Therefore, libtool will create"
+           echo "*** a static module, that should work as long as the dlopening"
+           echo "*** application is linked with the -dlopen flag."
+           if test -z "$global_symbol_pipe"; then
+             echo
+             echo "*** However, this would only work if libtool was able to extract symbol"
+             echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+             echo "*** not find such a program.  So, this module is probably useless."
+             echo "*** \`nm' from GNU binutils and a full rebuild may help."
+           fi
+           if test "$build_old_libs" = no; then
+             oldlibs="$output_objdir/$libname.$libext"
+             build_libtool_libs=module
+             build_old_libs=yes
+           else
+             build_libtool_libs=no
+           fi
+         else
+           echo "*** The inter-library dependencies that have been dropped here will be"
+           echo "*** automatically added whenever a program is linked with this library"
+           echo "*** or is declared to -dlopen it."
+
+           if test $allow_undefined = no; then
+             echo
+             echo "*** Since this library must not contain undefined symbols,"
+             echo "*** because either the platform does not support them or"
+             echo "*** it was explicitly requested with -no-undefined,"
+             echo "*** libtool will only create a static version of it."
+             if test "$build_old_libs" = no; then
+               oldlibs="$output_objdir/$libname.$libext"
+               build_libtool_libs=module
+               build_old_libs=yes
+             else
+               build_libtool_libs=no
+             fi
+           fi
+         fi
+       fi
+       # Done checking deplibs!
+       deplibs=$newdeplibs
+      fi
 
       # All the library-specific variables (install_libdir is set above).
       library_names=
       old_library=
       dlname=
-      current=0
-      revision=0
-      age=0
-
-      if test -n "$objs"; then
-        $echo "$modename: cannot build libtool library \`$output' from non-libtool objects:$objs" 2>&1
-        exit 1
-      fi
 
-      # How the heck are we supposed to write a wrapper for a shared library?
-      if test -n "$link_against_libtool_libs"; then
-        $echo "$modename: libtool library \`$output' may not depend on uninstalled libraries:$link_against_libtool_libs" 1>&2
-        exit 1
-      fi
-
-      if test -n "$dlfiles$dlprefiles"; then
-        $echo "$modename: warning: \`-dlopen' is ignored while creating libtool libraries" 1>&2
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
-      fi
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+       if test $hardcode_into_libs = yes; then
+         # Hardcode the library paths
+         hardcode_libdirs=
+         dep_rpath=
+         rpath="$finalize_rpath"
+         test "$mode" != relink && rpath="$compile_rpath$rpath"
+         for libdir in $rpath; do
+           if test -n "$hardcode_libdir_flag_spec"; then
+             if test -n "$hardcode_libdir_separator"; then
+               if test -z "$hardcode_libdirs"; then
+                 hardcode_libdirs="$libdir"
+               else
+                 # Just accumulate the unique libdirs.
+                 case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+                 *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+                   ;;
+                 *)
+                   hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+                   ;;
+                 esac
+               fi
+             else
+               eval flag=\"$hardcode_libdir_flag_spec\"
+               dep_rpath="$dep_rpath $flag"
+             fi
+           elif test -n "$runpath_var"; then
+             case "$perm_rpath " in
+             *" $libdir "*) ;;
+             *) perm_rpath="$perm_rpath $libdir" ;;
+             esac
+           fi
+         done
+         # Substitute the hardcoded libdirs into the rpath.
+         if test -n "$hardcode_libdir_separator" &&
+            test -n "$hardcode_libdirs"; then
+           libdir="$hardcode_libdirs"
+           eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+         fi
+         if test -n "$runpath_var" && test -n "$perm_rpath"; then
+           # We should set the runpath_var.
+           rpath=
+           for dir in $perm_rpath; do
+             rpath="$rpath$dir:"
+           done
+           eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+         fi
+         test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+       fi
 
-      if test -z "$rpath"; then
-        $echo "$modename: you must specify an installation directory with \`-rpath'" 1>&2
-       $echo "$help" 1>&2
-        exit 1
-      fi
+       shlibpath="$finalize_shlibpath"
+       test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+       if test -n "$shlibpath"; then
+         eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+       fi
 
-      set dummy $rpath
-      if test $# -gt 2; then
-       $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
-      fi
-      install_libdir="$2"
+       # Get the real and link names of the library.
+       eval library_names=\"$library_names_spec\"
+       set dummy $library_names
+       realname="$2"
+       shift; shift
 
-      # Parse the version information argument.
-      IFS="${IFS=      }"; save_ifs="$IFS"; IFS=':'
-      set dummy $vinfo
-      IFS="$save_ifs"
+       if test -n "$soname_spec"; then
+         eval soname=\"$soname_spec\"
+       else
+         soname="$realname"
+       fi
+       test -z "$dlname" && dlname=$soname
 
-      if test -n "$5"; then
-        $echo "$modename: too many parameters to \`-version-info'" 1>&2
-        $echo "$help" 1>&2
-        exit 1
-      fi
+       lib="$output_objdir/$realname"
+       for link
+       do
+         linknames="$linknames $link"
+       done
 
-      test -n "$2" && current="$2"
-      test -n "$3" && revision="$3"
-      test -n "$4" && age="$4"
+       # Ensure that we have .o objects for linkers which dislike .lo
+       # (e.g. aix) in case we are running --disable-static
+       for obj in $libobjs; do
+         xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+         if test "X$xdir" = "X$obj"; then
+           xdir="."
+         else
+           xdir="$xdir"
+         fi
+         baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+         oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+         if test ! -f $xdir/$oldobj; then
+           $show "(cd $xdir && ${LN_S} $baseobj $oldobj)"
+           $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $?
+         fi
+       done
 
-      # Check that each of the things are valid numbers.
-      case "$current" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
+       # Use standard objects if they are pic
+       test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+       # Prepare the list of exported symbols
+       if test -z "$export_symbols"; then
+         if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+           $show "generating symbol list for \`$libname.la'"
+           export_symbols="$output_objdir/$libname.exp"
+           $run $rm $export_symbols
+           eval cmds=\"$export_symbols_cmds\"
+           save_ifs="$IFS"; IFS='~'
+           for cmd in $cmds; do
+             IFS="$save_ifs"
+             $show "$cmd"
+             $run eval "$cmd" || exit $?
+           done
+           IFS="$save_ifs"
+           if test -n "$export_symbols_regex"; then
+             $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+             $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+             $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+             $run eval '$mv "${export_symbols}T" "$export_symbols"'
+           fi
+         fi
+       fi
 
-      case "$revision" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
+       if test -n "$export_symbols" && test -n "$include_expsyms"; then
+         $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+       fi
 
-      case "$age" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
+       if test -n "$convenience"; then
+         if test -n "$whole_archive_flag_spec"; then
+           eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+         else
+           gentop="$output_objdir/${outputname}x"
+           $show "${rm}r $gentop"
+           $run ${rm}r "$gentop"
+           $show "mkdir $gentop"
+           $run mkdir "$gentop"
+           status=$?
+           if test $status -ne 0 && test ! -d "$gentop"; then
+             exit $status
+           fi
+           generated="$generated $gentop"
+
+           for xlib in $convenience; do
+             # Extract the objects.
+             case $xlib in
+             [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+             *) xabs=`pwd`"/$xlib" ;;
+             esac
+             xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+             xdir="$gentop/$xlib"
+
+             $show "${rm}r $xdir"
+             $run ${rm}r "$xdir"
+             $show "mkdir $xdir"
+             $run mkdir "$xdir"
+             status=$?
+             if test $status -ne 0 && test ! -d "$xdir"; then
+               exit $status
+             fi
+             $show "(cd $xdir && $AR x $xabs)"
+             $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+             libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+           done
+         fi
+       fi
 
-      if test $age -gt $current; then
-        $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-      fi
-
-      # Calculate the version variables.
-      version_vars="version_type current age revision"
-      case "$version_type" in
-      none) ;;
-
-      linux)
-        version_vars="$version_vars major versuffix"
-        major=`expr $current - $age`
-        versuffix="$major.$age.$revision"
-        ;;
-
-      osf)
-        version_vars="$version_vars versuffix verstring"
-        major=`expr $current - $age`
-        versuffix="$current.$age.$revision"
-        verstring="$versuffix"
-
-        # Add in all the interfaces that we are compatible with.
-        loop=$age
-        while test $loop != 0; do
-          iface=`expr $current - $loop`
-          loop=`expr $loop - 1`
-          verstring="$verstring:${iface}.0"
-        done
-
-        # Make executables depend on our current version.
-        verstring="$verstring:${current}.0"
-        ;;
-
-      sunos)
-        version_vars="$version_vars major versuffix"
-        major="$current"
-        versuffix="$current.$revision"
-        ;;
+       if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+         eval flag=\"$thread_safe_flag_spec\"
+         linker_flags="$linker_flags $flag"
+       fi
 
-      *)
-        $echo "$modename: unknown library version type \`$version_type'" 1>&2
-        echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
-        exit 1
-        ;;
-      esac
+       # Make a backup of the uninstalled library when relinking
+       if test "$mode" = relink; then
+         $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+       fi
 
-      # Create the output directory, or remove our outputs if we need to.
-      if test -d $objdir; then
-        $show "$rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.*"
-        $run $rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.*
-      else
-        $show "$mkdir $objdir"
-        $run $mkdir $objdir
-       status=$?
-       if test $status -eq 0 || test -d $objdir; then :
+       # Do each of the archive commands.
+       if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+         eval cmds=\"$archive_expsym_cmds\"
        else
-         exit $status
+         save_deplibs="$deplibs"
+         for conv in $convenience; do
+           tmp_deplibs=
+           for test_deplib in $deplibs; do
+             if test "$test_deplib" != "$conv"; then
+               tmp_deplibs="$tmp_deplibs $test_deplib"
+             fi
+           done
+           deplibs="$tmp_deplibs"
+         done
+         eval cmds=\"$archive_cmds\"
+         deplibs="$save_deplibs"
        fi
-      fi
-
-      # Check to see if the archive will have undefined symbols.
-      if test "$allow_undefined" = yes; then
-        if test "$allow_undefined_flag" = unsupported; then
-          $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
-          build_libtool_libs=no
-         build_old_libs=yes
-        fi
-      else
-        # Don't allow undefined symbols.
-        allow_undefined_flag="$no_undefined_flag"
-      fi
-
-      # Add libc to deplibs on all systems.
-      dependency_libs="$deplibs"
-      deplibs="$deplibs -lc"
-
-      if test "$build_libtool_libs" = yes; then
-        # Get the real and link names of the library.
-        eval library_names=\"$library_names_spec\"
-        set dummy $library_names
-        realname="$2"
-        shift; shift
-
-        if test -n "$soname_spec"; then
-          eval soname=\"$soname_spec\"
-        else
-          soname="$realname"
-        fi
-
-        lib="$objdir/$realname"
-       for link
-       do
-         linknames="$linknames $link"
+       save_ifs="$IFS"; IFS='~'
+       for cmd in $cmds; do
+         IFS="$save_ifs"
+         $show "$cmd"
+         $run eval "$cmd" || exit $?
        done
+       IFS="$save_ifs"
 
-        # Use standard objects if they are PIC.
-        test -z "$pic_flag" && libobjs=`$echo "X$libobjs " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//g'`
-
-        # Do each of the archive commands.
-        eval cmds=\"$archive_cmds\"
-        IFS="${IFS=    }"; save_ifs="$IFS"; IFS=';'
-        for cmd in $cmds; do
-          IFS="$save_ifs"
-          $show "$cmd"
-          $run eval "$cmd" || exit $?
-        done
-        IFS="$save_ifs"
+       # Restore the uninstalled library and exit
+       if test "$mode" = relink; then
+         $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+         exit 0
+       fi
 
-        # Create links to the real library.
-        for linkname in $linknames; do
-          $show "(cd $objdir && $LN_S $realname $linkname)"
-          $run eval '(cd $objdir && $LN_S $realname $linkname)' || exit $?
-        done
+       # Create links to the real library.
+       for linkname in $linknames; do
+         if test "$realname" != "$linkname"; then
+           $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+           $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+         fi
+       done
 
-        # If -export-dynamic was specified, set the dlname.
-        if test "$export_dynamic" = yes; then
-          # On all known operating systems, these are identical.
-          dlname="$soname"
-        fi
+       # If -module or -export-dynamic was specified, set the dlname.
+       if test "$module" = yes || test "$export_dynamic" = yes; then
+         # On all known operating systems, these are identical.
+         dlname="$soname"
+       fi
       fi
-
-      # Now set the variables for building old libraries.
-      oldlib="$objdir/$libname.a"
       ;;
 
-    *.lo | *.o)
-      if test -n "$link_against_libtool_libs"; then
-        $echo "$modename: error: cannot link libtool libraries into reloadable objects" 1>&2
-        exit 1
-      fi
-
+    obj)
       if test -n "$deplibs"; then
-        $echo "$modename: warning: \`-l' and \`-L' are ignored while creating objects" 1>&2
+       $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
       fi
 
-      if test -n "$dlfiles$dlprefiles"; then
-        $echo "$modename: warning: \`-dlopen' is ignored while creating objects" 1>&2
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+       $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
       fi
 
       if test -n "$rpath"; then
-        $echo "$modename: warning: \`-rpath' is ignored while creating objects" 1>&2
+       $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+       $echo "$modename: warning: \`-R' is ignored for objects" 1>&2
       fi
 
       if test -n "$vinfo"; then
-        $echo "$modename: warning: \`-version-info' is ignored while creating objects" 1>&2
+       $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
       fi
 
       if test -n "$release"; then
-        $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2
+       $echo "$modename: warning: \`-release' is ignored for objects" 1>&2
       fi
 
-      case "$output" in
+      case $output in
       *.lo)
-        if test -n "$objs"; then
-          $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
-          exit 1
-        fi
-        libobj="$output"
-        obj=`$echo "X$output" | $Xsed -e 's/\.lo$/.o/'`
-        ;;
+       if test -n "$objs$old_deplibs"; then
+         $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+         exit 1
+       fi
+       libobj="$output"
+       obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+       ;;
       *)
-        libobj=
-        obj="$output"
-        ;;
+       libobj=
+       obj="$output"
+       ;;
       esac
 
       # Delete the old objects.
       $run $rm $obj $libobj
 
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec
+      wl=
+
+      if test -n "$convenience"; then
+       if test -n "$whole_archive_flag_spec"; then
+         eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+       else
+         gentop="$output_objdir/${obj}x"
+         $show "${rm}r $gentop"
+         $run ${rm}r "$gentop"
+         $show "mkdir $gentop"
+         $run mkdir "$gentop"
+         status=$?
+         if test $status -ne 0 && test ! -d "$gentop"; then
+           exit $status
+         fi
+         generated="$generated $gentop"
+
+         for xlib in $convenience; do
+           # Extract the objects.
+           case $xlib in
+           [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+           *) xabs=`pwd`"/$xlib" ;;
+           esac
+           xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+           xdir="$gentop/$xlib"
+
+           $show "${rm}r $xdir"
+           $run ${rm}r "$xdir"
+           $show "mkdir $xdir"
+           $run mkdir "$xdir"
+           status=$?
+           if test $status -ne 0 && test ! -d "$xdir"; then
+             exit $status
+           fi
+           $show "(cd $xdir && $AR x $xabs)"
+           $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+           reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+         done
+       fi
+      fi
+
       # Create the old-style object.
-      reload_objs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^       ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'`
+      reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
 
       output="$obj"
       eval cmds=\"$reload_cmds\"
-      IFS="${IFS=      }"; save_ifs="$IFS"; IFS=';'
+      save_ifs="$IFS"; IFS='~'
       for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
+       IFS="$save_ifs"
+       $show "$cmd"
+       $run eval "$cmd" || exit $?
       done
       IFS="$save_ifs"
 
       # Exit if we aren't doing a library object file.
-      test -z "$libobj" && exit 0
+      if test -z "$libobj"; then
+       if test -n "$gentop"; then
+         $show "${rm}r $gentop"
+         $run ${rm}r $gentop
+       fi
+
+       exit 0
+      fi
 
       if test "$build_libtool_libs" != yes; then
-        # Create an invalid libtool object if no PIC, so that we don't
-        # accidentally link it into a program.
-        $show "echo timestamp > $libobj"
-        $run eval "echo timestamp > $libobj" || exit $?
-        exit 0
-      fi
-
-      if test -n "$pic_flag"; then
-        # Only do commands if we really have different PIC objects.
-        reload_objs="$libobjs"
-        output="$libobj"
-        eval cmds=\"$reload_cmds\"
-        IFS="${IFS=    }"; save_ifs="$IFS"; IFS=';'
-        for cmd in $cmds; do
-          IFS="$save_ifs"
-          $show "$cmd"
-          $run eval "$cmd" || exit $?
-        done
-        IFS="$save_ifs"
+       if test -n "$gentop"; then
+         $show "${rm}r $gentop"
+         $run ${rm}r $gentop
+       fi
+
+       # Create an invalid libtool object if no PIC, so that we don't
+       # accidentally link it into a program.
+       $show "echo timestamp > $libobj"
+       $run eval "echo timestamp > $libobj" || exit $?
+       exit 0
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+       # Only do commands if we really have different PIC objects.
+       reload_objs="$libobjs $reload_conv_objs"
+       output="$libobj"
+       eval cmds=\"$reload_cmds\"
+       save_ifs="$IFS"; IFS='~'
+       for cmd in $cmds; do
+         IFS="$save_ifs"
+         $show "$cmd"
+         $run eval "$cmd" || exit $?
+       done
+       IFS="$save_ifs"
       else
-        # Just create a symlink.
-        $show "$LN_S $obj $libobj"
-        $run $LN_S $obj $libobj || exit 1
+       # Just create a symlink.
+       $show $rm $libobj
+       $run $rm $libobj
+       xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+       if test "X$xdir" = "X$libobj"; then
+         xdir="."
+       else
+         xdir="$xdir"
+       fi
+       baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+       oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+       $show "(cd $xdir && $LN_S $oldobj $baseobj)"
+       $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $?
+      fi
+
+      if test -n "$gentop"; then
+       $show "${rm}r $gentop"
+       $run ${rm}r $gentop
       fi
 
       exit 0
       ;;
 
-    *)
+    prog)
+      case $host in
+       *cygwin*) output=`echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;;
+      esac
       if test -n "$vinfo"; then
-        $echo "$modename: warning: \`-version-info' is ignored while linking programs" 1>&2
+       $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
       fi
 
       if test -n "$release"; then
-        $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2
+       $echo "$modename: warning: \`-release' is ignored for programs" 1>&2
       fi
 
-      if test -n "$rpath"; then
+      if test "$preload" = yes; then
+       if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+          test "$dlopen_self_static" = unknown; then
+         $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+       fi
+      fi
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+       # On Rhapsody replace the C library is the System framework
+       compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+       finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+       case $host in
+       *darwin*)
+         # Don't allow lazy linking, it breaks C++ global constructors
+         compile_command="$compile_command ${wl}-bind_at_load"
+         finalize_command="$finalize_command ${wl}-bind_at_load"
+         ;;
+       esac
+       ;;
+      esac
+
+      compile_command="$compile_command $compile_deplibs"
+      finalize_command="$finalize_command $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
        # If the user specified any rpath flags, then add them.
-       for libdir in $rpath; do
-          if test -n "$hardcode_libdir_flag_spec"; then
-            if test -n "$hardcode_libdir_separator"; then
-              if test -z "$hardcode_libdirs"; then
-                # Put the magic libdir with the hardcode flag.
-                hardcode_libdirs="$libdir"
-                libdir="@HARDCODE_LIBDIRS@"
-              else
-                # Just accumulate the unique libdirs.
-               case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
-               *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
-                 ;;
-               *)
-                 hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
-                 ;;
-               esac
-                libdir=
-              fi
-            fi
-
-            if test -n "$libdir"; then
-              eval flag=\"$hardcode_libdir_flag_spec\"
-
-              compile_command="$compile_command $flag"
-              finalize_command="$finalize_command $flag"
-            fi
-          elif test -n "$runpath_var"; then
-            case "$perm_rpath " in
-            *" $libdir "*) ;;
-            *) perm_rpath="$perm_rpath $libdir" ;;
-            esac
-          fi
+       for libdir in $rpath $xrpath; do
+         # This is the magic to use -rpath.
+         case "$finalize_rpath " in
+         *" $libdir "*) ;;
+         *) finalize_rpath="$finalize_rpath $libdir" ;;
+         esac
        done
       fi
 
-      # Substitute the hardcoded libdirs into the compile commands.
-      if test -n "$hardcode_libdir_separator"; then
-       compile_command=`$echo "X$compile_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"`
-       finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"`
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+       if test -n "$hardcode_libdir_flag_spec"; then
+         if test -n "$hardcode_libdir_separator"; then
+           if test -z "$hardcode_libdirs"; then
+             hardcode_libdirs="$libdir"
+           else
+             # Just accumulate the unique libdirs.
+             case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+             *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+               ;;
+             *)
+               hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+               ;;
+             esac
+           fi
+         else
+           eval flag=\"$hardcode_libdir_flag_spec\"
+           rpath="$rpath $flag"
+         fi
+       elif test -n "$runpath_var"; then
+         case "$perm_rpath " in
+         *" $libdir "*) ;;
+         *) perm_rpath="$perm_rpath $libdir" ;;
+         esac
+       fi
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+         case :$dllsearchpath: in
+         *":$libdir:"*) ;;
+         *) dllsearchpath="$dllsearchpath:$libdir";;
+         esac
+         ;;
+       esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+        test -n "$hardcode_libdirs"; then
+       libdir="$hardcode_libdirs"
+       eval rpath=\" $hardcode_libdir_flag_spec\"
       fi
-
-      if test -n "$libobjs" && test "$build_old_libs" = yes; then
-        # Transform all the library objects into standard objects.
-        compile_command=`$echo "X$compile_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
-        finalize_command=`$echo "X$finalize_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+       if test -n "$hardcode_libdir_flag_spec"; then
+         if test -n "$hardcode_libdir_separator"; then
+           if test -z "$hardcode_libdirs"; then
+             hardcode_libdirs="$libdir"
+           else
+             # Just accumulate the unique libdirs.
+             case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+             *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+               ;;
+             *)
+               hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+               ;;
+             esac
+           fi
+         else
+           eval flag=\"$hardcode_libdir_flag_spec\"
+           rpath="$rpath $flag"
+         fi
+       elif test -n "$runpath_var"; then
+         case "$finalize_perm_rpath " in
+         *" $libdir "*) ;;
+         *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+         esac
+       fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+        test -n "$hardcode_libdirs"; then
+       libdir="$hardcode_libdirs"
+       eval rpath=\" $hardcode_libdir_flag_spec\"
       fi
+      finalize_rpath="$rpath"
 
-      if test "$export_dynamic" = yes && test -n "$NM" && test -n "$global_symbol_pipe"; then
-        dlsyms="${output}S.c"
-      else
-        dlsyms=
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+       # Transform all the library objects into standard objects.
+       compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+       finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
       fi
 
-      if test -n "$dlsyms"; then
-        # Add our own program objects to the preloaded list.
-        dlprefiles=`$echo "X$objs$dlprefiles " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
-
-       # Discover the nlist of each of the dlfiles.
-        nlist="$objdir/${output}.nm"
-
-       if test -d $objdir; then
-         $show "$rm $nlist ${nlist}T"
-         $run $rm "$nlist" "${nlist}T"
+      dlsyms=
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+       if test -n "$NM" && test -n "$global_symbol_pipe"; then
+         dlsyms="${outputname}S.c"
        else
-         $show "$mkdir $objdir"
-         $run $mkdir $objdir
-         status=$?
-         if test $status -eq 0 || test -d $objdir; then :
-         else
-           exit $status
-         fi
+         $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
        fi
+      fi
 
-        for arg in $dlprefiles; do
-         $show "extracting global C symbols from \`$arg'"
-         $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
-        done
+      if test -n "$dlsyms"; then
+       case $dlsyms in
+       "") ;;
+       *.c)
+         # Discover the nlist of each of the dlfiles.
+         nlist="$output_objdir/${outputname}.nm"
 
-        # Parse the name list into a source file.
-        $show "creating $objdir/$dlsyms"
-        if test -z "$run"; then
-         # Make sure we at least have an empty file.
-         test -f "$nlist" || : > "$nlist"
+         $show "$rm $nlist ${nlist}S ${nlist}T"
+         $run $rm "$nlist" "${nlist}S" "${nlist}T"
 
-         # Try sorting and uniquifying the output.
-         if sort "$nlist" | uniq > "$nlist"T; then
-           mv -f "$nlist"T "$nlist"
-           wcout=`wc "$nlist" 2>/dev/null`
-           count=`echo "X$wcout" | $Xsed -e 's/^[      ]*\([0-9][0-9]*\).*$/\1/'`
-           (test "$count" -ge 0) 2>/dev/null || count=-1
-         else
-           $rm "$nlist"T
-           count=-1
-         fi
+         # Parse the name list into a source file.
+         $show "creating $output_objdir/$dlsyms"
 
-         case "$dlsyms" in
-         "") ;;
-         *.c)
-           $echo > "$objdir/$dlsyms" "\
-/* $dlsyms - symbol resolution table for \`$output' dlsym emulation. */
-/* Generated by $PROGRAM - GNU $PACKAGE $VERSION */
+         test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
 
 #ifdef __cplusplus
 extern \"C\" {
 #endif
 
 /* Prevent the only kind of declaration conflicts we can make. */
-#define dld_preloaded_symbol_count some_other_symbol
-#define dld_preloaded_symbols some_other_symbol
+#define lt_preloaded_symbols some_other_symbol
 
 /* External symbol declarations for the compiler. */\
 "
 
-           if test -f "$nlist"; then
-             sed -e 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> "$objdir/$dlsyms"
+         if test "$dlself" = yes; then
+           $show "generating symbol list for \`$output'"
+
+           test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+           # Add our own program objects to the symbol list.
+           progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+           for arg in $progfiles; do
+             $show "extracting global C symbols from \`$arg'"
+             $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+           done
+
+           if test -n "$exclude_expsyms"; then
+             $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+             $run eval '$mv "$nlist"T "$nlist"'
+           fi
+
+           if test -n "$export_symbols_regex"; then
+             $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+             $run eval '$mv "$nlist"T "$nlist"'
+           fi
+
+           # Prepare the list of exported symbols
+           if test -z "$export_symbols"; then
+             export_symbols="$output_objdir/$output.exp"
+             $run $rm $export_symbols
+             $run eval "${SED} -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+           else
+             $run eval "${SED} -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"'
+             $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T'
+             $run eval 'mv "$nlist"T "$nlist"'
+           fi
+         fi
+
+         for arg in $dlprefiles; do
+           $show "extracting global C symbols from \`$arg'"
+           name=`echo "$arg" | ${SED} -e 's%^.*/%%'`
+           $run eval 'echo ": $name " >> "$nlist"'
+           $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+         done
+
+         if test -z "$run"; then
+           # Make sure we have at least an empty file.
+           test -f "$nlist" || : > "$nlist"
+
+           if test -n "$exclude_expsyms"; then
+             egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+             $mv "$nlist"T "$nlist"
+           fi
+
+           # Try sorting and uniquifying the output.
+           if grep -v "^: " < "$nlist" |
+               if sort -k 3 </dev/null >/dev/null 2>&1; then
+                 sort -k 3
+               else
+                 sort +2
+               fi |
+               uniq > "$nlist"S; then
+             :
            else
-             echo '/* NONE */' >> "$objdir/$dlsyms"
+             grep -v "^: " < "$nlist" > "$nlist"S
            fi
 
-           $echo >> "$objdir/$dlsyms" "\
+           if test -f "$nlist"S; then
+             eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+           else
+             echo '/* NONE */' >> "$output_objdir/$dlsyms"
+           fi
 
-#undef dld_preloaded_symbol_count
-#undef dld_preloaded_symbols
+           $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
 
 #if defined (__STDC__) && __STDC__
-# define __ptr_t void *
+# define lt_ptr void *
 #else
-# define __ptr_t char *
+# define lt_ptr char *
+# define const
 #endif
 
-/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */
-int dld_preloaded_symbol_count = $count;
-
 /* The mapping between symbol names and symbols. */
-struct {
-  char *name;
-  __ptr_t address;
+const struct {
+  const char *name;
+  lt_ptr address;
 }
-dld_preloaded_symbols[] =
+lt_preloaded_symbols[] =
 {\
 "
 
-           if test -f "$nlist"; then
-             sed 's/^\(.*\) \(.*\)$/  {"\1", (__ptr_t) \&\2},/' < "$nlist" >> "$objdir/$dlsyms"
-           fi
+           eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms"
 
-           $echo >> "$objdir/$dlsyms" "\
-  {0, (__ptr_t) 0}
+           $echo >> "$output_objdir/$dlsyms" "\
+  {0, (lt_ptr) 0}
 };
 
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_preloaded_symbols;
+}
+#endif
+
 #ifdef __cplusplus
 }
 #endif\
 "
-           ;;
+         fi
 
-         *)
-           $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
-           exit 1
-           ;;
+         pic_flag_for_symtable=
+         case $host in
+         # compiling the symbol table file with pic_flag works around
+         # a FreeBSD bug that causes programs to crash when -lm is
+         # linked before any other PIC object.  But we must not use
+         # pic_flag when linking with -static.  The problem exists in
+         # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+         *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+           case "$compile_command " in
+           *" -static "*) ;;
+           *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";;
+           esac;;
+         *-*-hpux*)
+           case "$compile_command " in
+           *" -static "*) ;;
+           *) pic_flag_for_symtable=" $pic_flag -DPIC";;
+           esac
          esac
-        fi
-
-        # Now compile the dynamic symbol file.
-        $show "(cd $objdir && $CC -c$no_builtin_flag \"$dlsyms\")"
-        $run eval '(cd $objdir && $CC -c$no_builtin_flag "$dlsyms")' || exit $?
 
-        # Transform the symbol file into the correct name.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"`
-      elif test "$export_dynamic" != yes; then
-        test -n "$dlfiles$dlprefiles" && $echo "$modename: warning: \`-dlopen' and \`-dlpreopen' are ignored without \`-export-dynamic'" 1>&2
-      else
-        # We keep going just in case the user didn't refer to
-        # dld_preloaded_symbols.  The linker will fail if global_symbol_pipe
-        # really was required.
-        $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+         # Now compile the dynamic symbol file.
+         $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+         $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
 
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
-      fi
+         # Clean up the generated files.
+         $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+         $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
 
-      if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then
-        # Replace the output file specification.
-        compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+         # Transform the symbol file into the correct name.
+         compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+         finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+         ;;
+       *)
+         $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+         exit 1
+         ;;
+       esac
+      else
+       # We keep going just in case the user didn't refer to
+       # lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+       # really was required.
 
-        # We have no uninstalled library dependencies, so finalize right now.
-        $show "$compile_command"
-        $run eval "$compile_command"
-        exit $?
+       # Nullify the symbol file.
+       compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+       finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
       fi
 
-      # Replace the output file specification.
-      compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'%g'`
-      finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'T%g'`
+      if test $need_relink = no || test "$build_libtool_libs" != yes; then
+       # Replace the output file specification.
+       compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+       link_command="$compile_command$compile_rpath"
 
-      # Create the binary in the object directory, then wrap it.
-      if test -d $objdir; then :
-      else
-        $show "$mkdir $objdir"
-       $run $mkdir $objdir
+       # We have no uninstalled library dependencies, so finalize right now.
+       $show "$link_command"
+       $run eval "$link_command"
        status=$?
-       if test $status -eq 0 || test -d $objdir; then :
-       else
-         exit $status
+
+       # Delete the generated files.
+       if test -n "$dlsyms"; then
+         $show "$rm $output_objdir/${outputname}S.${objext}"
+         $run $rm "$output_objdir/${outputname}S.${objext}"
        fi
+
+       exit $status
       fi
 
       if test -n "$shlibpath_var"; then
-        # We should set the shlibpath_var
-        rpath=
-        for dir in $temp_rpath; do
-          case "$dir" in
-          /* | [A-Za-z]:\\*)
-            # Absolute path.
-            rpath="$rpath$dir:"
-            ;;
-          *)
-            # Relative path: add a thisdir entry.
-            rpath="$rpath\$thisdir/$dir:"
-            ;;
-          esac
-        done
-        temp_rpath="$rpath"
-      fi
-
-      # Delete the old output file.
-      $run $rm $output
-
-      if test -n "$compile_shlibpath"; then
-        compile_command="$shlibpath_var=\"$compile_shlibpath\$$shlibpath_var\" $compile_command"
+       # We should set the shlibpath_var
+       rpath=
+       for dir in $temp_rpath; do
+         case $dir in
+         [\\/]* | [A-Za-z]:[\\/]*)
+           # Absolute path.
+           rpath="$rpath$dir:"
+           ;;
+         *)
+           # Relative path: add a thisdir entry.
+           rpath="$rpath\$thisdir/$dir:"
+           ;;
+         esac
+       done
+       temp_rpath="$rpath"
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+       compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
       fi
       if test -n "$finalize_shlibpath"; then
-        finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+       finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+       if test -n "$perm_rpath"; then
+         # We should set the runpath_var.
+         rpath=
+         for dir in $perm_rpath; do
+           rpath="$rpath$dir:"
+         done
+         compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+       fi
+       if test -n "$finalize_perm_rpath"; then
+         # We should set the runpath_var.
+         rpath=
+         for dir in $finalize_perm_rpath; do
+           rpath="$rpath$dir:"
+         done
+         finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+       fi
       fi
 
-      if test -n "$runpath_var" && test -n "$perm_rpath"; then
-        # We should set the runpath_var.
-        rpath=
-        for dir in $perm_rpath; do
-          rpath="$rpath$dir:"
-        done
-        compile_command="$runpath_var=\"$rpath\$$runpath_var\" $compile_command"
-        finalize_command="$runpath_var=\"$rpath\$$runpath_var\" $finalize_command"
+      if test "$no_install" = yes; then
+       # We don't need to create a wrapper script.
+       link_command="$compile_var$compile_command$compile_rpath"
+       # Replace the output file specification.
+       link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+       # Delete the old output file.
+       $run $rm $output
+       # Link the executable and exit
+       $show "$link_command"
+       $run eval "$link_command" || exit $?
+       exit 0
       fi
 
-      case "$hardcode_action" in
-      relink)
-        # AGH! Flame the AIX and HP-UX people for me, will ya?
-        $echo "$modename: warning: using a buggy system linker" 1>&2
-        $echo "$modename: relinking will be required before \`$output' can be installed" 1>&2
-        ;;
-      esac
+      if test "$hardcode_action" = relink; then
+       # Fast installation is not supported
+       link_command="$compile_var$compile_command$compile_rpath"
+       relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+       $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+       $echo "$modename: \`$output' will be relinked during installation" 1>&2
+      else
+       if test "$fast_install" != no; then
+         link_command="$finalize_var$compile_command$finalize_rpath"
+         if test "$fast_install" = yes; then
+           relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+         else
+           # fast_install is set to needless
+           relink_command=
+         fi
+       else
+         link_command="$compile_var$compile_command$compile_rpath"
+         relink_command="$finalize_var$finalize_command$finalize_rpath"
+       fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
 
-      $show "$compile_command"
-      $run eval "$compile_command" || exit $?
+      # Delete the old output files.
+      $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      $show "$link_command"
+      $run eval "$link_command" || exit $?
 
       # Now create the wrapper script.
       $show "creating $output"
 
-      # Quote the finalize command for shipping.
-      finalize_command=`$echo "X$finalize_command" | $Xsed -e "$sed_quote_subst"`
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+       # Preserve any variables that may affect compiler behavior
+       for var in $variables_saved_for_relink; do
+         if eval test -z \"\${$var+set}\"; then
+           relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+         elif eval var_value=\$$var; test -z "$var_value"; then
+           relink_command="$var=; export $var; $relink_command"
+         else
+           var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+           relink_command="$var=\"$var_value\"; export $var; $relink_command"
+         fi
+       done
+       relink_command="(cd `pwd`; $relink_command)"
+       relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      fi
 
       # Quote $echo for shipping.
-      qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+      if test "X$echo" = "X$SHELL $0 --fallback-echo"; then
+       case $0 in
+       [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";;
+       *) qecho="$SHELL `pwd`/$0 --fallback-echo";;
+       esac
+       qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+      else
+       qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+      fi
 
       # Only actually do things if our run command is non-null.
       if test -z "$run"; then
-        $rm $output
-        trap "$rm $output; exit 1" 1 2 15
+       # win32 will think the script is a binary if it has
+       # a .exe suffix, so we strip it off here.
+       case $output in
+         *.exe) output=`echo $output|${SED} 's,.exe$,,'` ;;
+       esac
+       # test for cygwin because mv fails w/o .exe extensions
+       case $host in
+         *cygwin*) exeext=.exe ;;
+         *) exeext= ;;
+       esac
+       $rm $output
+       trap "$rm $output; exit 1" 1 2 15
 
-        $echo > $output "\
-#! /bin/sh
+       $echo > $output "\
+#! $SHELL
 
-# $output - temporary wrapper script for $objdir/$output
-# Generated by ltmain.sh - GNU $PACKAGE $VERSION
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
 #
 # The $output program cannot be directly executed until all the libtool
 # libraries that it depends on are installed.
 #
-# This wrapper script should never be moved out of \``pwd`'.
+# This wrapper script should never be moved out of the build directory.
 # If it is, it will not operate correctly.
 
 # Sed substitution that helps us do robust quoting.  It backslashifies
 # metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
+Xsed="${SED}"' -e 1s/^X//'
 sed_quote_subst='$sed_quote_subst'
 
 # The HP-UX ksh and POSIX shell print the target directory to stdout
 # if CDPATH is set.
-if test \"\${CDPATH+set}\" = set; then CDPATH=; export CDPATH; fi
+if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi
+
+relink_command=\"$relink_command\"
 
 # This environment variable determines our operation mode.
 if test \"\$libtool_install_magic\" = \"$magic\"; then
-  # install mode needs the following variables:
-  link_against_libtool_libs='$link_against_libtool_libs'
-  finalize_command=\"$finalize_command\"
+  # install mode needs the following variable:
+  notinst_deplibs='$notinst_deplibs'
 else
   # When we are sourced in execute mode, \$file and \$echo are already set.
-  if test \"\$libtool_execute_magic\" = \"$magic\"; then :
-  else
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
     echo=\"$qecho\"
     file=\"\$0\"
+    # Make sure echo works.
+    if test \"X\$1\" = X--no-reexec; then
+      # Discard the --no-reexec flag, and continue.
+      shift
+    elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+      # Yippee, \$echo works!
+      :
+    else
+      # Restart under the correct shell, and then maybe \$echo will work.
+      exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+    fi
   fi\
 "
-        $echo >> $output "\
+       $echo >> $output "\
 
   # Find the directory that this script lives in.
   thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
   test \"x\$thisdir\" = \"x\$file\" && thisdir=.
 
   # Follow symbolic links until we get to the real thisdir.
-  file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\`
+  file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\`
   while test -n \"\$file\"; do
     destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
 
     # If there was a directory component, then change thisdir.
     if test \"x\$destdir\" != \"x\$file\"; then
       case \"\$destdir\" in
-      /* | [A-Za-z]:\\*) thisdir=\"\$destdir\" ;;
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
       *) thisdir=\"\$thisdir/\$destdir\" ;;
       esac
     fi
 
     file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
-    file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\`
   done
 
   # Try to get the absolute directory name.
   absdir=\`cd \"\$thisdir\" && pwd\`
   test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+       if test "$fast_install" = yes; then
+         echo >> $output "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" || \\
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
 
+    if test ! -d \"\$progdir\"; then
+      $mkdir \"\$progdir\"
+    else
+      $rm \"\$progdir/\$file\"
+    fi"
+
+         echo >> $output "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+       $echo \"\$relink_command_output\" >&2
+       $rm \"\$progdir/\$file\"
+       exit 1
+      fi
+    fi
+
+    $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $rm \"\$progdir/\$program\";
+      $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $rm \"\$progdir/\$file\"
+  fi"
+       else
+         echo >> $output "\
+  program='$outputname'
   progdir=\"\$thisdir/$objdir\"
-  program='$output'
+"
+       fi
+
+       echo >> $output "\
 
   if test -f \"\$progdir/\$program\"; then"
 
-        # Export our shlibpath_var if we have one.
-        if test -n "$shlibpath_var" && test -n "$temp_rpath"; then
-          $echo >> $output "\
+       # Export our shlibpath_var if we have one.
+       if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+         $echo >> $output "\
     # Add our own library path to $shlibpath_var
     $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
 
     # Some systems cannot cope with colon-terminated $shlibpath_var
-    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/:*\$//'\`
+    # The second colon is a workaround for a bug in BeOS R4 ${SED}
+    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
 
     export $shlibpath_var
 "
-        fi
+       fi
+
+       # fixup the dll searchpath if we need to.
+       if test -n "$dllsearchpath"; then
+         $echo >> $output "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+       fi
 
-        $echo >> $output "\
+       $echo >> $output "\
     if test \"\$libtool_execute_magic\" != \"$magic\"; then
       # Run the actual program with our arguments.
+"
+       case $host in
+       # win32 systems need to use the prog path for dll
+       # lookup to work
+       *-*-cygwin* | *-*-pw32*)
+         $echo >> $output "\
+      exec \$progdir/\$program \${1+\"\$@\"}
+"
+         ;;
+
+       # Backslashes separate directories on plain windows
+       *-*-mingw | *-*-os2*)
+         $echo >> $output "\
+      exec \$progdir\\\\\$program \${1+\"\$@\"}
+"
+         ;;
 
+       *)
+         $echo >> $output "\
       # Export the path to the program.
       PATH=\"\$progdir:\$PATH\"
       export PATH
 
       exec \$program \${1+\"\$@\"}
-
+"
+         ;;
+       esac
+       $echo >> $output "\
       \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
       exit 1
     fi
@@ -1530,48 +3814,189 @@ else
   fi
 fi\
 "
-        chmod +x $output
+       chmod +x $output
       fi
       exit 0
       ;;
     esac
 
     # See if we need to build an old-fashioned archive.
-    if test "$build_old_libs" = "yes"; then
-      # Transform .lo files to .o files.
-      oldobjs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^   ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'`
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+       oldobjs="$libobjs_save"
+       addlibs="$convenience"
+       build_libtool_libs=no
+      else
+       if test "$build_libtool_libs" = module; then
+         oldobjs="$libobjs_save"
+         build_libtool_libs=no
+       else
+         oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`
+       fi
+       addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+       gentop="$output_objdir/${outputname}x"
+       $show "${rm}r $gentop"
+       $run ${rm}r "$gentop"
+       $show "mkdir $gentop"
+       $run mkdir "$gentop"
+       status=$?
+       if test $status -ne 0 && test ! -d "$gentop"; then
+         exit $status
+       fi
+       generated="$generated $gentop"
+
+       # Add in members from convenience archives.
+       for xlib in $addlibs; do
+         # Extract the objects.
+         case $xlib in
+         [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+         *) xabs=`pwd`"/$xlib" ;;
+         esac
+         xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+         xdir="$gentop/$xlib"
+
+         $show "${rm}r $xdir"
+         $run ${rm}r "$xdir"
+         $show "mkdir $xdir"
+         $run mkdir "$xdir"
+         status=$?
+         if test $status -ne 0 && test ! -d "$xdir"; then
+           exit $status
+         fi
+         $show "(cd $xdir && $AR x $xabs)"
+         $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+         oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP`
+       done
+      fi
 
       # Do each command in the archive commands.
       if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
        eval cmds=\"$old_archive_from_new_cmds\"
       else
+       # Ensure that we have .o objects in place in case we decided
+       # not to build a shared library, and have fallen back to building
+       # static libs even though --disable-static was passed!
+       for oldobj in $oldobjs; do
+         if test ! -f $oldobj; then
+           xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'`
+           if test "X$xdir" = "X$oldobj"; then
+             xdir="."
+           else
+             xdir="$xdir"
+           fi
+           baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'`
+           obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+           $show "(cd $xdir && ${LN_S} $obj $baseobj)"
+           $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $?
+         fi
+       done
+
        eval cmds=\"$old_archive_cmds\"
       fi
-      IFS="${IFS=      }"; save_ifs="$IFS"; IFS=';'
+      save_ifs="$IFS"; IFS='~'
       for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
+       IFS="$save_ifs"
+       $show "$cmd"
+       $run eval "$cmd" || exit $?
       done
       IFS="$save_ifs"
+    done
+
+    if test -n "$generated"; then
+      $show "${rm}r$generated"
+      $run ${rm}r$generated
     fi
 
     # Now create the libtool archive.
-    case "$output" in
+    case $output in
     *.la)
       old_library=
-      test "$build_old_libs" = yes && old_library="$libname.a"
-
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
       $show "creating $output"
 
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+       if eval test -z \"\${$var+set}\"; then
+         relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+       elif eval var_value=\$$var; test -z "$var_value"; then
+         relink_command="$var=; export $var; $relink_command"
+       else
+         var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+         relink_command="$var=\"$var_value\"; export $var; $relink_command"
+       fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $0 --mode=relink $libtool_args)"
+      relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+
       # Only create the output if not a dry run.
       if test -z "$run"; then
-        $echo > $output "\
-# $output - a libtool library file
-# Generated by ltmain.sh - GNU $PACKAGE $VERSION
+       for installed in no yes; do
+         if test "$installed" = yes; then
+           if test -z "$install_libdir"; then
+             break
+           fi
+           output="$output_objdir/$outputname"i
+           # Replace all uninstalled libtool libraries with the installed ones
+           newdependency_libs=
+           for deplib in $dependency_libs; do
+             case $deplib in
+             *.la)
+               name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+               eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+               if test -z "$libdir"; then
+                 $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+                 exit 1
+               fi
+               newdependency_libs="$newdependency_libs $libdir/$name"
+               ;;
+             *) newdependency_libs="$newdependency_libs $deplib" ;;
+             esac
+           done
+           dependency_libs="$newdependency_libs"
+           newdlfiles=
+           for lib in $dlfiles; do
+             name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+             eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+             if test -z "$libdir"; then
+               $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+               exit 1
+             fi
+             newdlfiles="$newdlfiles $libdir/$name"
+           done
+           dlfiles="$newdlfiles"
+           newdlprefiles=
+           for lib in $dlprefiles; do
+             name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+             eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+             if test -z "$libdir"; then
+               $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+               exit 1
+             fi
+             newdlprefiles="$newdlprefiles $libdir/$name"
+           done
+           dlprefiles="$newdlprefiles"
+         fi
+         $rm $output
+         # place dlname in correct position for cygwin
+         tdlname=$dlname
+         case $host,$output,$installed,$module,$dlname in
+           *cygwin*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;;
+         esac
+         $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
 
 # The name that we can dlopen(3).
-dlname='$dlname'
+dlname='$tdlname'
 
 # Names of this library.
 library_names='$library_names'
@@ -1587,15 +4012,26 @@ current=$current
 age=$age
 revision=$revision
 
+# Is this an already installed library?
+installed=$installed
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
 # Directory that this library needs to be installed in:
-libdir='$install_libdir'\
-"
+libdir='$install_libdir'"
+         if test "$installed" = no && test $need_relink = yes; then
+           $echo >> $output "\
+relink_command=\"$relink_command\""
+         fi
+       done
       fi
 
       # Do a symbolic link so that the libtool archive can be found in
       # LD_LIBRARY_PATH before the program is installed.
-      $show "(cd $objdir && $LN_S ../$output $output)"
-      $run eval "(cd $objdir && $LN_S ../$output $output)" || exit 1
+      $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+      $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
       ;;
     esac
     exit 0
@@ -1605,12 +4041,14 @@ libdir='$install_libdir'\
   install)
     modename="$modename: install"
 
-    # There may be an optional /bin/sh argument at the beginning of
+    # There may be an optional sh(1) argument at the beginning of
     # install_prog (especially on Windows NT).
-    if test "$nonopt" = "$SHELL"; then
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       $echo "X$nonopt" | $Xsed | grep shtool > /dev/null; then
       # Aesthetically quote it.
       arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
-      case "$arg" in
+      case $arg in
       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*)
        arg="\"$arg\""
        ;;
@@ -1626,7 +4064,7 @@ libdir='$install_libdir'\
     # The real first argument should be the name of the installation program.
     # Aesthetically quote it.
     arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-    case "$arg" in
+    case $arg in
     *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \      ]*|*]*)
       arg="\"$arg\""
       ;;
@@ -1639,42 +4077,42 @@ libdir='$install_libdir'\
     opts=
     prev=
     install_type=
-    isdir=
+    isdir=no
     stripme=
     for arg
     do
       if test -n "$dest"; then
-        files="$files $dest"
-        dest="$arg"
-        continue
+       files="$files $dest"
+       dest="$arg"
+       continue
       fi
 
-      case "$arg" in
+      case $arg in
       -d) isdir=yes ;;
       -f) prev="-f" ;;
       -g) prev="-g" ;;
       -m) prev="-m" ;;
       -o) prev="-o" ;;
       -s)
-        stripme=" -s"
-        continue
-        ;;
+       stripme=" -s"
+       continue
+       ;;
       -*) ;;
 
       *)
-        # If the previous option needed an argument, then skip it.
-        if test -n "$prev"; then
-          prev=
-        else
-          dest="$arg"
-          continue
-        fi
-        ;;
+       # If the previous option needed an argument, then skip it.
+       if test -n "$prev"; then
+         prev=
+       else
+         dest="$arg"
+         continue
+       fi
+       ;;
       esac
 
       # Aesthetically quote the argument.
       arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-      case "$arg" in
+      case $arg in
       *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*)
        arg="\"$arg\""
        ;;
@@ -1696,9 +4134,9 @@ libdir='$install_libdir'\
 
     if test -z "$files"; then
       if test -z "$dest"; then
-        $echo "$modename: no file or destination specified" 1>&2
+       $echo "$modename: no file or destination specified" 1>&2
       else
-        $echo "$modename: you must specify a destination" 1>&2
+       $echo "$modename: you must specify a destination" 1>&2
       fi
       $echo "$help" 1>&2
       exit 1
@@ -1709,7 +4147,7 @@ libdir='$install_libdir'\
 
     # Check to see that the destination is a directory.
     test -d "$dest" && isdir=yes
-    if test -n "$isdir"; then
+    if test "$isdir" = yes; then
       destdir="$dest"
       destname=
     else
@@ -1720,23 +4158,23 @@ libdir='$install_libdir'\
       # Not a directory, so check to see that there is only one file specified.
       set dummy $files
       if test $# -gt 2; then
-        $echo "$modename: \`$dest' is not a directory" 1>&2
-        $echo "$help" 1>&2
-        exit 1
+       $echo "$modename: \`$dest' is not a directory" 1>&2
+       $echo "$help" 1>&2
+       exit 1
       fi
     fi
-    case "$destdir" in
-    /* | [A-Za-z]:\\*) ;;
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
     *)
       for file in $files; do
-        case "$file" in
-        *.lo) ;;
-        *)
-          $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-          ;;
-        esac
+       case $file in
+       *.lo) ;;
+       *)
+         $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+         ;;
+       esac
       done
       ;;
     esac
@@ -1751,210 +4189,266 @@ libdir='$install_libdir'\
     for file in $files; do
 
       # Do each installation.
-      case "$file" in
-      *.a)
-        # Do the static libraries later.
-        staticlibs="$staticlibs $file"
-        ;;
+      case $file in
+      *.$libext)
+       # Do the static libraries later.
+       staticlibs="$staticlibs $file"
+       ;;
 
       *.la)
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-        fi
-
-        library_names=
-        old_library=
-        # If there is no directory component, then add one.
-        case "$file" in
-        */* | *\\*) . $file ;;
-        *) . ./$file ;;
-        esac
-
-        # Add the libdir to current_libdirs if it is the destination.
-        if test "X$destdir" = "X$libdir"; then
-          case "$current_libdirs " in
-          *" $libdir "*) ;;
-          *) current_libdirs="$current_libdirs $libdir" ;;
-          esac
-        else
-          # Note the libdir as a future libdir.
-          case "$future_libdirs " in
-          *" $libdir "*) ;;
-          *) future_libdirs="$future_libdirs $libdir" ;;
-          esac
-        fi
-
-        dir="`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/"
-        test "X$dir" = "X$file/" && dir=
-        dir="$dir$objdir"
-
-        # See the names of the shared library.
-        set dummy $library_names
-        if test -n "$2"; then
-          realname="$2"
-          shift
-          shift
-
-          # Install the shared library and build the symlinks.
-          $show "$install_prog $dir/$realname $destdir/$realname"
-          $run eval "$install_prog $dir/$realname $destdir/$realname" || exit $?
-          test "X$dlname" = "X$realname" && dlname=
-
-          if test $# -gt 0; then
-            # Delete the old symlinks.
-            rmcmd="$rm"
-            for linkname
-            do
-              rmcmd="$rmcmd $destdir/$linkname"
-            done
-            $show "$rmcmd"
-            $run $rmcmd
-
-            # ... and create new ones.
-            for linkname
-            do
-              test "X$dlname" = "X$linkname" && dlname=
-              $show "(cd $destdir && $LN_S $realname $linkname)"
-              $run eval "(cd $destdir && $LN_S $realname $linkname)"
-            done
-          fi
-
-          if test -n "$dlname"; then
-            # Install the dynamically-loadable library.
-            $show "$install_prog $dir/$dlname $destdir/$dlname"
-            $run eval "$install_prog $dir/$dlname $destdir/$dlname" || exit $?
-          fi
-
-          # Do each command in the postinstall commands.
-          lib="$destdir/$realname"
-          eval cmds=\"$postinstall_cmds\"
-          IFS="${IFS=  }"; save_ifs="$IFS"; IFS=';'
-          for cmd in $cmds; do
-            IFS="$save_ifs"
-            $show "$cmd"
-            $run eval "$cmd" || exit $?
-          done
-          IFS="$save_ifs"
-        fi
-
-        # Install the pseudo-library for information purposes.
-        name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-        $show "$install_prog $file $destdir/$name"
-        $run eval "$install_prog $file $destdir/$name" || exit $?
-
-        # Maybe install the static library, too.
-        test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
-        ;;
+       # Check to see that this really is a libtool archive.
+       if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+       else
+         $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+       fi
+
+       library_names=
+       old_library=
+       relink_command=
+       # If there is no directory component, then add one.
+       case $file in
+       */* | *\\*) . $file ;;
+       *) . ./$file ;;
+       esac
+
+       # Add the libdir to current_libdirs if it is the destination.
+       if test "X$destdir" = "X$libdir"; then
+         case "$current_libdirs " in
+         *" $libdir "*) ;;
+         *) current_libdirs="$current_libdirs $libdir" ;;
+         esac
+       else
+         # Note the libdir as a future libdir.
+         case "$future_libdirs " in
+         *" $libdir "*) ;;
+         *) future_libdirs="$future_libdirs $libdir" ;;
+         esac
+       fi
+
+       dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+       test "X$dir" = "X$file/" && dir=
+       dir="$dir$objdir"
+
+       if test -n "$relink_command"; then
+         $echo "$modename: warning: relinking \`$file'" 1>&2
+         $show "$relink_command"
+         if $run eval "$relink_command"; then :
+         else
+           $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+           continue
+         fi
+       fi
+
+       # See the names of the shared library.
+       set dummy $library_names
+       if test -n "$2"; then
+         realname="$2"
+         shift
+         shift
+
+         srcname="$realname"
+         test -n "$relink_command" && srcname="$realname"T
+
+         # Install the shared library and build the symlinks.
+         $show "$install_prog $dir/$srcname $destdir/$realname"
+         $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+         if test -n "$stripme" && test -n "$striplib"; then
+           $show "$striplib $destdir/$realname"
+           $run eval "$striplib $destdir/$realname" || exit $?
+         fi
+
+         if test $# -gt 0; then
+           # Delete the old symlinks, and create new ones.
+           for linkname
+           do
+             if test "$linkname" != "$realname"; then
+               $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+               $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+             fi
+           done
+         fi
+
+         # Do each command in the postinstall commands.
+         lib="$destdir/$realname"
+         eval cmds=\"$postinstall_cmds\"
+         save_ifs="$IFS"; IFS='~'
+         for cmd in $cmds; do
+           IFS="$save_ifs"
+           $show "$cmd"
+           $run eval "$cmd" || exit $?
+         done
+         IFS="$save_ifs"
+       fi
+
+       # Install the pseudo-library for information purposes.
+       name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+       instname="$dir/$name"i
+       $show "$install_prog $instname $destdir/$name"
+       $run eval "$install_prog $instname $destdir/$name" || exit $?
+
+       # Maybe install the static library, too.
+       test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+       ;;
 
       *.lo)
-        # Install (i.e. copy) a libtool object.
-
-        # Figure out destination file name, if it wasn't already specified.
-        if test -n "$destname"; then
-          destfile="$destdir/$destname"
-        else
-          destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-          destfile="$destdir/$destfile"
-        fi
-
-        # Deduce the name of the destination old-style object file.
-        case "$destfile" in
-        *.lo)
-          staticdest=`$echo "X$destfile" | $Xsed -e 's/\.lo$/\.o/'`
-          ;;
-        *.o)
-          staticdest="$destfile"
-          destfile=
-          ;;
-        *)
-          $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-          ;;
-        esac
-
-        # Install the libtool object if requested.
-        if test -n "$destfile"; then
-          $show "$install_prog $file $destfile"
-          $run eval "$install_prog $file $destfile" || exit $?
-        fi
-
-        # Install the old object if enabled.
-        if test "$build_old_libs" = yes; then
-          # Deduce the name of the old-style object file.
-          staticobj=`$echo "X$file" | $Xsed -e 's/\.lo$/\.o/'`
-
-          $show "$install_prog $staticobj $staticdest"
-          $run eval "$install_prog \$staticobj \$staticdest" || exit $?
-        fi
-        exit 0
-        ;;
+       # Install (i.e. copy) a libtool object.
+
+       # Figure out destination file name, if it wasn't already specified.
+       if test -n "$destname"; then
+         destfile="$destdir/$destname"
+       else
+         destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+         destfile="$destdir/$destfile"
+       fi
+
+       # Deduce the name of the destination old-style object file.
+       case $destfile in
+       *.lo)
+         staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+         ;;
+       *.$objext)
+         staticdest="$destfile"
+         destfile=
+         ;;
+       *)
+         $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+         ;;
+       esac
+
+       # Install the libtool object if requested.
+       if test -n "$destfile"; then
+         $show "$install_prog $file $destfile"
+         $run eval "$install_prog $file $destfile" || exit $?
+       fi
+
+       # Install the old object if enabled.
+       if test "$build_old_libs" = yes; then
+         # Deduce the name of the old-style object file.
+         staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+         $show "$install_prog $staticobj $staticdest"
+         $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+       fi
+       exit 0
+       ;;
 
       *)
-        # Do a test to see if this is really a libtool program.
-        if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
-          link_against_libtool_libs=
-          finalize_command=
-
-          # If there is no directory component, then add one.
-          case "$file" in
-          */* | *\\*) . $file ;;
-          *) . ./$file ;;
-          esac
-
-          # Check the variables that should have been set.
-          if test -z "$link_against_libtool_libs" || test -z "$finalize_command"; then
-            $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2
-            exit 1
-          fi
-
-          finalize=yes
-          for lib in $link_against_libtool_libs; do
-            # Check to see that each library is installed.
-            libdir=
-            if test -f "$lib"; then
-              # If there is no directory component, then add one.
-              case "$lib" in
-              */* | *\\*) . $lib ;;
-              *) . ./$lib ;;
-              esac
-            fi
-            libfile="$libdir/`$echo "X$lib" | $Xsed -e 's%^.*/%%g'`"
-            if test -z "$libdir"; then
-              $echo "$modename: warning: \`$lib' contains no -rpath information" 1>&2
-            elif test -f "$libfile"; then :
-            else
-              $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
-              finalize=no
-            fi
-          done
-
-          if test "$hardcode_action" = relink; then
-            if test "$finalize" = yes; then
-              $echo "$modename: warning: relinking \`$file' on behalf of your buggy system linker" 1>&2
-              $show "$finalize_command"
-              if $run eval "$finalize_command"; then :
-              else
-                $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
-                continue
-              fi
-              file="$objdir/$file"T
-            else
-              $echo "$modename: warning: cannot relink \`$file' on behalf of your buggy system linker" 1>&2
-            fi
-          else
-            # Install the binary that we compiled earlier.
+       # Figure out destination file name, if it wasn't already specified.
+       if test -n "$destname"; then
+         destfile="$destdir/$destname"
+       else
+         destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+         destfile="$destdir/$destfile"
+       fi
+
+       # Do a test to see if this is really a libtool program.
+       case $host in
+       *cygwin*|*mingw*)
+           wrapper=`echo $file | ${SED} -e 's,.exe$,,'`
+           ;;
+       *)
+           wrapper=$file
+           ;;
+       esac
+       if (${SED} -e '4q' $wrapper | egrep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then
+         notinst_deplibs=
+         relink_command=
+
+         # If there is no directory component, then add one.
+         case $file in
+         */* | *\\*) . $wrapper ;;
+         *) . ./$wrapper ;;
+         esac
+
+         # Check the variables that should have been set.
+         if test -z "$notinst_deplibs"; then
+           $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2
+           exit 1
+         fi
+
+         finalize=yes
+         for lib in $notinst_deplibs; do
+           # Check to see that each library is installed.
+           libdir=
+           if test -f "$lib"; then
+             # If there is no directory component, then add one.
+             case $lib in
+             */* | *\\*) . $lib ;;
+             *) . ./$lib ;;
+             esac
+           fi
+           libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+           if test -n "$libdir" && test ! -f "$libfile"; then
+             $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+             finalize=no
+           fi
+         done
+
+         relink_command=
+         # If there is no directory component, then add one.
+         case $file in
+         */* | *\\*) . $wrapper ;;
+         *) . ./$wrapper ;;
+         esac
+
+         outputname=
+         if test "$fast_install" = no && test -n "$relink_command"; then
+           if test "$finalize" = yes && test -z "$run"; then
+             tmpdir="/tmp"
+             test -n "$TMPDIR" && tmpdir="$TMPDIR"
+             tmpdir="$tmpdir/libtool-$$"
+             if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then :
+             else
+               $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2
+               continue
+             fi
+             file=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+             outputname="$tmpdir/$file"
+             # Replace the output file specification.
+             relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+             $show "$relink_command"
+             if $run eval "$relink_command"; then :
+             else
+               $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+               ${rm}r "$tmpdir"
+               continue
+             fi
+             file="$outputname"
+           else
+             $echo "$modename: warning: cannot relink \`$file'" 1>&2
+           fi
+         else
+           # Install the binary that we compiled earlier.
            file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
-          fi
-        fi
+         fi
+       fi
 
-        $show "$install_prog$stripme $file $dest"
-        $run eval "$install_prog\$stripme \$file \$dest" || exit $?
-        ;;
+       # remove .exe since cygwin /usr/bin/install will append another
+       # one anyways
+       case $install_prog,$host in
+       /usr/bin/install*,*cygwin*)
+         case $file:$destfile in
+         *.exe:*.exe)
+           # this is ok
+           ;;
+         *.exe:*)
+           destfile=$destfile.exe
+           ;;
+         *:*.exe)
+           destfile=`echo $destfile | ${SED} -e 's,.exe$,,'`
+           ;;
+         esac
+         ;;
+       esac
+       $show "$install_prog$stripme $file $destfile"
+       $run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+       test -n "$outputname" && ${rm}r "$tmpdir"
+       ;;
       esac
     done
 
@@ -1967,13 +4461,18 @@ libdir='$install_libdir'\
       $show "$install_prog $file $oldlib"
       $run eval "$install_prog \$file \$oldlib" || exit $?
 
+      if test -n "$stripme" && test -n "$striplib"; then
+       $show "$old_striplib $oldlib"
+       $run eval "$old_striplib $oldlib" || exit $?
+      fi
+
       # Do each command in the postinstall commands.
       eval cmds=\"$old_postinstall_cmds\"
-      IFS="${IFS=      }"; save_ifs="$IFS"; IFS=';'
+      save_ifs="$IFS"; IFS='~'
       for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
+       IFS="$save_ifs"
+       $show "$cmd"
+       $run eval "$cmd" || exit $?
       done
       IFS="$save_ifs"
     done
@@ -1985,54 +4484,59 @@ libdir='$install_libdir'\
     if test -n "$current_libdirs"; then
       # Maybe just do a dry run.
       test -n "$run" && current_libdirs=" -n$current_libdirs"
-      exec $SHELL $0 --finish$current_libdirs
-      exit 1
+      exec_cmd='$SHELL $0 --finish$current_libdirs'
+    else
+      exit 0
     fi
-
-    exit 0
     ;;
 
   # libtool finish mode
   finish)
     modename="$modename: finish"
     libdirs="$nonopt"
+    admincmds=
 
     if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
       for dir
       do
-        libdirs="$libdirs $dir"
+       libdirs="$libdirs $dir"
       done
 
       for libdir in $libdirs; do
        if test -n "$finish_cmds"; then
          # Do each command in the finish commands.
          eval cmds=\"$finish_cmds\"
-          IFS="${IFS=  }"; save_ifs="$IFS"; IFS=';'
-          for cmd in $cmds; do
-            IFS="$save_ifs"
-            $show "$cmd"
-            $run eval "$cmd"
-          done
-          IFS="$save_ifs"
+         save_ifs="$IFS"; IFS='~'
+         for cmd in $cmds; do
+           IFS="$save_ifs"
+           $show "$cmd"
+           $run eval "$cmd" || admincmds="$admincmds
+       $cmd"
+         done
+         IFS="$save_ifs"
        fi
        if test -n "$finish_eval"; then
          # Do the single finish_eval.
          eval cmds=\"$finish_eval\"
-         $run eval "$cmds"
+         $run eval "$cmds" || admincmds="$admincmds
+       $cmds"
        fi
       done
     fi
 
-    echo "------------------------------------------------------------------------------"
+    # Exit here if they wanted silent mode.
+    test "$show" = ":" && exit 0
+
+    echo "----------------------------------------------------------------------"
     echo "Libraries have been installed in:"
     for libdir in $libdirs; do
       echo "   $libdir"
     done
     echo
-    echo "To link against installed libraries in a given directory, LIBDIR,"
-    echo "you must use the \`-LLIBDIR' flag during linking."
-    echo
-    echo " You will also need to do one of the following:"
+    echo "If you ever happen to want to link against installed libraries"
+    echo "in a given directory, LIBDIR, you must either use libtool, and"
+    echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+    echo "flag during linking and do at least one of the following:"
     if test -n "$shlibpath_var"; then
       echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
       echo "     during execution"
@@ -2047,13 +4551,16 @@ libdir='$install_libdir'\
 
       echo "   - use the \`$flag' linker flag"
     fi
+    if test -n "$admincmds"; then
+      echo "   - have your system administrator run these commands:$admincmds"
+    fi
     if test -f /etc/ld.so.conf; then
       echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
     fi
     echo
     echo "See any operating system documentation about shared libraries for"
     echo "more information, such as the ld(1) and ld.so(8) manual pages."
-    echo "------------------------------------------------------------------------------"
+    echo "----------------------------------------------------------------------"
     exit 0
     ;;
 
@@ -2071,32 +4578,31 @@ libdir='$install_libdir'\
 
     # Handle -dlopen flags immediately.
     for file in $execute_dlfiles; do
-      if test -f "$file"; then :
-      else
+      if test ! -f "$file"; then
        $echo "$modename: \`$file' is not a file" 1>&2
        $echo "$help" 1>&2
        exit 1
       fi
 
       dir=
-      case "$file" in
+      case $file in
       *.la)
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-        fi
+       # Check to see that this really is a libtool archive.
+       if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+       else
+         $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+         $echo "$help" 1>&2
+         exit 1
+       fi
 
        # Read the libtool library.
        dlname=
        library_names=
 
-        # If there is no directory component, then add one.
-       case "$file" in
+       # If there is no directory component, then add one.
+       case $file in
        */* | *\\*) . $file ;;
-        *) . ./$file ;;
+       *) . ./$file ;;
        esac
 
        # Skip this library if it cannot be dlopened.
@@ -2125,7 +4631,7 @@ libdir='$install_libdir'\
 
       *)
        $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
-        continue
+       continue
        ;;
       esac
 
@@ -2149,13 +4655,13 @@ libdir='$install_libdir'\
     args=
     for file
     do
-      case "$file" in
+      case $file in
       -*) ;;
       *)
-        # Do a test to see if this is really a libtool program.
-        if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
+       # Do a test to see if this is really a libtool program.
+       if (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
          # If there is no directory component, then add one.
-         case "$file" in
+         case $file in
          */* | *\\*) . $file ;;
          *) . ./$file ;;
          esac
@@ -2163,7 +4669,7 @@ libdir='$install_libdir'\
          # Transform arg to wrapped name.
          file="$progdir/$program"
        fi
-        ;;
+       ;;
       esac
       # Quote arguments (to preserve shell metacharacters).
       file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
@@ -2171,32 +4677,48 @@ libdir='$install_libdir'\
     done
 
     if test -z "$run"; then
-      # Export the shlibpath_var.
-      eval "export $shlibpath_var"
+      if test -n "$shlibpath_var"; then
+       # Export the shlibpath_var.
+       eval "export $shlibpath_var"
+      fi
 
-      # Now actually exec the command.
-      eval "exec \$cmd$args"
+      # Restore saved enviroment variables
+      if test "${save_LC_ALL+set}" = set; then
+       LC_ALL="$save_LC_ALL"; export LC_ALL
+      fi
+      if test "${save_LANG+set}" = set; then
+       LANG="$save_LANG"; export LANG
+      fi
 
-      $echo "$modename: cannot exec \$cmd$args"
-      exit 1
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
     else
       # Display what would be done.
-      eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
-      $echo "export $shlibpath_var"
+      if test -n "$shlibpath_var"; then
+       eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+       $echo "export $shlibpath_var"
+      fi
       $echo "$cmd$args"
       exit 0
     fi
     ;;
 
-  # libtool uninstall mode
-  uninstall)
-    modename="$modename: uninstall"
+  # libtool clean and uninstall mode
+  clean | uninstall)
+    modename="$modename: $mode"
     rm="$nonopt"
     files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
 
     for arg
     do
-      case "$arg" in
+      case $arg in
+      -f) rm="$rm $arg"; rmforce=yes ;;
       -*) rm="$rm $arg" ;;
       *) files="$files $arg" ;;
       esac
@@ -2208,74 +4730,123 @@ libdir='$install_libdir'\
       exit 1
     fi
 
+    rmdirs=
+
     for file in $files; do
       dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
-      test "X$dir" = "X$file" && dir=.
+      if test "X$dir" = "X$file"; then
+       dir=.
+       objdir="$objdir"
+      else
+       objdir="$dir/$objdir"
+      fi
       name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+      test $mode = uninstall && objdir="$dir"
+
+      # Remember objdir for removal later, being careful to avoid duplicates
+      if test $mode = clean; then
+       case " $rmdirs " in
+         *" $objdir "*) ;;
+         *) rmdirs="$rmdirs $objdir" ;;
+       esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if (test -L "$file") >/dev/null 2>&1 \
+       || (test -h "$file") >/dev/null 2>&1 \
+       || test -f "$file"; then
+       :
+      elif test -d "$file"; then
+       exit_status=1
+       continue
+      elif test "$rmforce" = yes; then
+       continue
+      fi
 
       rmfiles="$file"
 
-      case "$name" in
+      case $name in
       *.la)
-        # Possibly a libtool archive, so verify it.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
-          . $dir/$name
-
-          # Delete the libtool libraries and symlinks.
-          for n in $library_names; do
-            rmfiles="$rmfiles $dir/$n"
-            test "X$n" = "X$dlname" && dlname=
-          done
-          test -n "$dlname" && rmfiles="$rmfiles $dir/$dlname"
-          test -n "$old_library" && rmfiles="$rmfiles $dir/$old_library"
-
-         $show "$rm $rmfiles"
-         $run $rm $rmfiles
-
-         if test -n "$library_names"; then
-           # Do each command in the postuninstall commands.
-           eval cmds=\"$postuninstall_cmds\"
-           IFS="${IFS=         }"; save_ifs="$IFS"; IFS=';'
-           for cmd in $cmds; do
+       # Possibly a libtool archive, so verify it.
+       if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+         . $dir/$name
+
+         # Delete the libtool libraries and symlinks.
+         for n in $library_names; do
+           rmfiles="$rmfiles $objdir/$n"
+         done
+         test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+         test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+
+         if test $mode = uninstall; then
+           if test -n "$library_names"; then
+             # Do each command in the postuninstall commands.
+             eval cmds=\"$postuninstall_cmds\"
+             save_ifs="$IFS"; IFS='~'
+             for cmd in $cmds; do
+               IFS="$save_ifs"
+               $show "$cmd"
+               $run eval "$cmd"
+               if test $? != 0 && test "$rmforce" != yes; then
+                 exit_status=1
+               fi
+             done
              IFS="$save_ifs"
-             $show "$cmd"
-             $run eval "$cmd"
-           done
-           IFS="$save_ifs"
-         fi
+           fi
 
-          if test -n "$old_library"; then
-           # Do each command in the old_postuninstall commands.
-           eval cmds=\"$old_postuninstall_cmds\"
-           IFS="${IFS=         }"; save_ifs="$IFS"; IFS=';'
-           for cmd in $cmds; do
+           if test -n "$old_library"; then
+             # Do each command in the old_postuninstall commands.
+             eval cmds=\"$old_postuninstall_cmds\"
+             save_ifs="$IFS"; IFS='~'
+             for cmd in $cmds; do
+               IFS="$save_ifs"
+               $show "$cmd"
+               $run eval "$cmd"
+               if test $? != 0 && test "$rmforce" != yes; then
+                 exit_status=1
+               fi
+             done
              IFS="$save_ifs"
-             $show "$cmd"
-             $run eval "$cmd"
-           done
-           IFS="$save_ifs"
+           fi
+           # FIXME: should reinstall the best remaining shared library.
          fi
-
-          # FIXME: should reinstall the best remaining shared library.
-        fi
-        ;;
+       fi
+       ;;
 
       *.lo)
-        if test "$build_old_libs" = yes; then
-          oldobj=`$echo "X$name" | $Xsed -e 's/\.lo$/\.o/'`
-          rmfiles="$rmfiles $dir/$oldobj"
-        fi
-       $show "$rm $rmfiles"
-       $run $rm $rmfiles
-        ;;
+       if test "$build_old_libs" = yes; then
+         oldobj=`$echo "X$name" | $Xsed -e "$lo2o"`
+         rmfiles="$rmfiles $dir/$oldobj"
+       fi
+       ;;
 
       *)
-       $show "$rm $rmfiles"
-       $run $rm $rmfiles
+       # Do a test to see if this is a libtool program.
+       if test $mode = clean &&
+          (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+         relink_command=
+         . $dir/$file
+
+         rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+         if test "$fast_install" = yes && test -n "$relink_command"; then
+           rmfiles="$rmfiles $objdir/lt-$name"
+         fi
+       fi
        ;;
       esac
+      $show "$rm $rmfiles"
+      $run $rm $rmfiles || exit_status=1
     done
-    exit 0
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+       $show "rmdir $dir"
+       $run rmdir $dir >/dev/null 2>&1
+      fi
+    done
+
+    exit $exit_status
     ;;
 
   "")
@@ -2285,20 +4856,29 @@ libdir='$install_libdir'\
     ;;
   esac
 
-  $echo "$modename: invalid operation mode \`$mode'" 1>&2
-  $echo "$generic_help" 1>&2
-  exit 1
+  if test -z "$exec_cmd"; then
+    $echo "$modename: invalid operation mode \`$mode'" 1>&2
+    $echo "$generic_help" 1>&2
+    exit 1
+  fi
 fi # test -z "$show_help"
 
+if test -n "$exec_cmd"; then
+  eval exec $exec_cmd
+  exit 1
+fi
+
 # We need to display help for each of the modes.
-case "$mode" in
+case $mode in
 "") $echo \
 "Usage: $modename [OPTION]... [MODE-ARG]...
 
 Provide generalized library-building support services.
 
+    --config          show all configuration variables
+    --debug           enable verbose shell tracing
 -n, --dry-run         display commands without modifying any files
-    --features        display configuration information and exit
+    --features        display basic configuration information and exit
     --finish          same as \`--mode=finish'
     --help            display this help message and exit
     --mode=MODE       use operation mode MODE [default=inferred from MODE-ARGS]
@@ -2308,6 +4888,7 @@ Provide generalized library-building support services.
 
 MODE must be one of the following:
 
+      clean           remove files from the build directory
       compile         compile a source file into a libtool object
       execute         automatically set library path, then run a program
       finish          complete the installation of libtool libraries
@@ -2320,12 +4901,33 @@ a more detailed description of MODE."
   exit 0
   ;;
 
+clean)
+  $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+  ;;
+
 compile)
   $echo \
 "Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
 
 Compile a source file into a libtool library object.
 
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -prefer-pic       try to building PIC objects only
+  -prefer-non-pic   try to building non-PIC objects only
+  -static           always build a \`.o' file suitable for static linking
+
 COMPILE-COMMAND is a command to be used in creating a \`standard' object file
 from the given SOURCEFILE.
 
@@ -2392,18 +4994,27 @@ a program from several object files.
 The following components of LINK-COMMAND are treated specially:
 
   -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
   -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
-  -dlpreopen FILE   link in FILE and add its symbols to dld_preloaded_symbols
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
   -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+                   try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+                   try to export only the symbols matching REGEX
   -LLIBDIR          search LIBDIR for required installed libraries
   -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
   -no-undefined     declare that a library does not refer to external symbols
   -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
   -release RELEASE  specify package release information
   -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
   -static           do not do any dynamic linking of libtool libraries
   -version-info CURRENT[:REVISION[:AGE]]
-                    specify library version info [each variable defaults to 0]
+                   specify library version info [each variable defaults to 0]
 
 All other options (arguments beginning with \`-') are ignored.
 
@@ -2411,18 +5022,19 @@ Every other argument is treated as a filename.  Files ending in \`.la' are
 treated as uninstalled libtool libraries, other files are standard or library
 object files.
 
-If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only
-library objects (\`.lo' files) may be specified, and \`-rpath' is required.
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
 
-If OUTPUT-FILE ends in \`.a', then a standard library is created using \`ar'
-and \`ranlib'.
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
 
-If OUTPUT-FILE ends in \`.lo' or \`.o', then a reloadable object file is
-created, otherwise an executable program is created."
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
   ;;
 
 uninstall)
-  $echo
+  $echo \
 "Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
 
 Remove libraries from an installation directory.
diff --git a/makecfg.c b/makecfg.c
new file mode 100644 (file)
index 0000000..787ea37
--- /dev/null
+++ b/makecfg.c
@@ -0,0 +1,300 @@
+/*
+ * makecfg.c
+ *
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ * Last Modified : March 23, 2005
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifndef offsetof               /* defined in <stddef.h> */
+#define offsetof(type, mem) ((size_t) \
+               ((char *)&((type *)0)->mem - (char *)(type *)0))
+#endif
+
+void
+print_structure_offset (void)
+{
+  printf("\n");
+  printf("; ---- macros for structure access -----------------------------------------\n");
+  printf("\n");
+
+  printf("; struct jpeg_compress_struct\n\n");
+  printf("%%define jcstruct_image_width(b)         ((b) + %3u) ; cinfo->image_width\n",
+       (unsigned)offsetof(struct jpeg_compress_struct, image_width));
+  printf("%%define jcstruct_max_v_samp_factor(b)   ((b) + %3u) ; cinfo->max_v_samp_factor\n",
+       (unsigned)offsetof(struct jpeg_compress_struct, max_v_samp_factor));
+  printf("\n");
+
+  printf("; struct jpeg_decompress_struct\n\n");
+  printf("%%define jdstruct_output_width(b)        ((b) + %3u) ; cinfo->output_width\n",
+       (unsigned)offsetof(struct jpeg_decompress_struct, output_width));
+  printf("%%define jdstruct_max_v_samp_factor(b)   ((b) + %3u) ; cinfo->max_v_samp_factor\n",
+       (unsigned)offsetof(struct jpeg_decompress_struct, max_v_samp_factor));
+  printf("%%define jdstruct_sample_range_limit(b)  ((b) + %3u) ; cinfo->sample_range_limit\n",
+       (unsigned)offsetof(struct jpeg_decompress_struct, sample_range_limit));
+  printf("\n");
+
+  printf("; jpeg_component_info\n\n");
+  printf("%%define jcompinfo_v_samp_factor(b)      ((b) + %2u) ; compptr->v_samp_factor\n",
+       (unsigned)offsetof(jpeg_component_info, v_samp_factor));
+  printf("%%define jcompinfo_width_in_blocks(b)    ((b) + %2u) ; compptr->width_in_blocks\n",
+       (unsigned)offsetof(jpeg_component_info, width_in_blocks));
+  printf("%%define jcompinfo_downsampled_width(b)  ((b) + %2u) ; compptr->downsampled_width\n",
+       (unsigned)offsetof(jpeg_component_info, downsampled_width));
+  printf("%%define jcompinfo_dct_table(b)          ((b) + %2u) ; compptr->dct_table\n",
+       (unsigned)offsetof(jpeg_component_info, dct_table));
+  printf("\n");
+}
+
+
+void
+print_jconfig_h_macro (void)
+{
+  printf("\n");
+  printf("; ---- macros from jconfig.h -----------------------------------------------\n");
+  printf("\n");
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+  printf("%%define NEED_SHORT_EXTERNAL_NAMES\t; Use short forms of external names\n");
+#else
+  printf("%%undef NEED_SHORT_EXTERNAL_NAMES\t; Use short forms of external names\n");
+#endif
+  printf("\n");
+}
+
+
+void
+print_jmorecfg_h_macro (void)
+{
+  printf("\n");
+  printf("; ---- macros from jmorecfg.h ----------------------------------------------\n");
+  printf("\n");
+
+  printf("; Capability options common to encoder and decoder:\n");
+  printf("\n");
+#ifdef DCT_ISLOW_SUPPORTED
+  printf("%%define DCT_ISLOW_SUPPORTED\t; slow but accurate integer algorithm\n");
+#else
+  printf("%%undef DCT_ISLOW_SUPPORTED\t; slow but accurate integer algorithm\n");
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  printf("%%define DCT_IFAST_SUPPORTED\t; faster, less accurate integer method\n");
+#else
+  printf("%%undef DCT_IFAST_SUPPORTED\t; faster, less accurate integer method\n");
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  printf("%%define DCT_FLOAT_SUPPORTED\t; floating-point: accurate, fast on fast HW\n");
+#else
+  printf("%%undef DCT_FLOAT_SUPPORTED\t; floating-point: accurate, fast on fast HW\n");
+#endif
+  printf("\n");
+
+  printf("; Decoder capability options:\n");
+  printf("\n");
+#ifdef IDCT_SCALING_SUPPORTED
+  printf("%%define IDCT_SCALING_SUPPORTED\t\t; Output rescaling via IDCT?\n");
+#else
+  printf("%%undef IDCT_SCALING_SUPPORTED\t\t; Output rescaling via IDCT?\n");
+#endif
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  printf("%%define UPSAMPLE_MERGING_SUPPORTED\t; Fast path for sloppy upsampling?\n");
+#else
+  printf("%%undef UPSAMPLE_MERGING_SUPPORTED\t; Fast path for sloppy upsampling?\n");
+#endif
+#ifdef UPSAMPLE_H1V2_SUPPORTED
+  printf("%%define UPSAMPLE_H1V2_SUPPORTED\t\t; Fast/fancy processing for 1h2v?\n");
+#else
+  printf("%%undef UPSAMPLE_H1V2_SUPPORTED\t\t; Fast/fancy processing for 1h2v?\n");
+#endif
+  printf("\n");
+
+#if (RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4) && \
+    (RGB_RED < 0 || RGB_RED >= RGB_PIXELSIZE || RGB_GREEN < 0 || \
+     RGB_GREEN >= RGB_PIXELSIZE || RGB_BLUE < 0 || RGB_BLUE >= RGB_PIXELSIZE || \
+     RGB_RED == RGB_GREEN || RGB_GREEN == RGB_BLUE || RGB_RED == RGB_BLUE)
+#error "Incorrect RGB pixel offset."
+#endif
+  printf("; Ordering of RGB data in scanlines passed to or from the application.\n");
+  printf("\n");
+  printf("%%define RGB_RED\t\t%u\t; Offset of Red in an RGB scanline element\n", RGB_RED);
+  printf("%%define RGB_GREEN\t%u\t; Offset of Green\n", RGB_GREEN);
+  printf("%%define RGB_BLUE\t%u\t; Offset of Blue\n", RGB_BLUE);
+  printf("%%define RGB_PIXELSIZE\t%u\t; JSAMPLEs per RGB scanline element\n", RGB_PIXELSIZE);
+  printf("\n");
+#ifdef RGBX_FILLER_0XFF
+  printf("%%define RGBX_FILLER_0XFF\t; fill dummy bytes with 0xFF in RGBX format\n");
+#else
+  printf("%%undef RGBX_FILLER_0XFF\t\t; fill dummy bytes with 0xFF in RGBX format\n");
+#endif
+  printf("\n");
+
+  printf("; SIMD support options (encoder):\n");
+  printf("\n");
+#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED
+  printf("%%define JCCOLOR_RGBYCC_MMX_SUPPORTED\t; RGB->YCC conversion with MMX\n");
+#else
+  printf("%%undef JCCOLOR_RGBYCC_MMX_SUPPORTED\t; RGB->YCC conversion with MMX\n");
+#endif
+#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED
+  printf("%%define JCCOLOR_RGBYCC_SSE2_SUPPORTED\t; RGB->YCC conversion with SSE2\n");
+#else
+  printf("%%undef JCCOLOR_RGBYCC_SSE2_SUPPORTED\t; RGB->YCC conversion with SSE2\n");
+#endif
+#ifdef JCSAMPLE_MMX_SUPPORTED
+  printf("%%define JCSAMPLE_MMX_SUPPORTED\t\t; downsampling with MMX\n");
+#else
+  printf("%%undef JCSAMPLE_MMX_SUPPORTED\t\t; downsampling with MMX\n");
+#endif
+#ifdef JCSAMPLE_SSE2_SUPPORTED
+  printf("%%define JCSAMPLE_SSE2_SUPPORTED\t\t; downsampling with SSE2\n");
+#else
+  printf("%%undef JCSAMPLE_SSE2_SUPPORTED\t\t; downsampling with SSE2\n");
+#endif
+#ifdef JFDCT_INT_MMX_SUPPORTED
+  printf("%%define JFDCT_INT_MMX_SUPPORTED\t\t; forward DCT with MMX\n");
+#else
+  printf("%%undef JFDCT_INT_MMX_SUPPORTED\t\t; forward DCT with MMX\n");
+#endif
+#ifdef JFDCT_INT_SSE2_SUPPORTED
+  printf("%%define JFDCT_INT_SSE2_SUPPORTED\t; forward DCT with SSE2\n");
+#else
+  printf("%%undef JFDCT_INT_SSE2_SUPPORTED\t\t; forward DCT with SSE2\n");
+#endif
+#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED
+  printf("%%define JFDCT_FLT_3DNOW_MMX_SUPPORTED\t; forward DCT with 3DNow!/MMX\n");
+#else
+  printf("%%undef JFDCT_FLT_3DNOW_MMX_SUPPORTED\t; forward DCT with 3DNow!/MMX\n");
+#endif
+#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED
+  printf("%%define JFDCT_FLT_SSE_MMX_SUPPORTED\t; forward DCT with SSE/MMX\n");
+#else
+  printf("%%undef JFDCT_FLT_SSE_MMX_SUPPORTED\t; forward DCT with SSE/MMX\n");
+#endif
+#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED
+  printf("%%define JFDCT_FLT_SSE_SSE2_SUPPORTED\t; forward DCT with SSE/SSE2\n");
+#else
+  printf("%%undef JFDCT_FLT_SSE_SSE2_SUPPORTED\t; forward DCT with SSE/SSE2\n");
+#endif
+#ifdef JFDCT_INT_QUANTIZE_WITH_DIVISION
+  printf("%%define JFDCT_INT_QUANTIZE_WITH_DIVISION ; Use general quantization method\n");
+#else
+  printf("%%undef JFDCT_INT_QUANTIZE_WITH_DIVISION ; Use general quantization method\n");
+#endif
+  printf("\n");
+
+  printf("; SIMD support options (decoder):\n");
+  printf("\n");
+#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED
+  printf("%%define JDCOLOR_YCCRGB_MMX_SUPPORTED\t; YCC->RGB conversion with MMX\n");
+#else
+  printf("%%undef JDCOLOR_YCCRGB_MMX_SUPPORTED\t; YCC->RGB conversion with MMX\n");
+#endif
+#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED
+  printf("%%define JDCOLOR_YCCRGB_SSE2_SUPPORTED\t; YCC->RGB conversion with SSE2\n");
+#else
+  printf("%%undef JDCOLOR_YCCRGB_SSE2_SUPPORTED\t; YCC->RGB conversion with SSE2\n");
+#endif
+#ifdef JDMERGE_MMX_SUPPORTED
+  printf("%%define JDMERGE_MMX_SUPPORTED\t\t; merged upsampling with MMX\n");
+#else
+  printf("%%undef JDMERGE_MMX_SUPPORTED\t\t; merged upsampling with MMX\n");
+#endif
+#ifdef JDMERGE_SSE2_SUPPORTED
+  printf("%%define JDMERGE_SSE2_SUPPORTED\t\t; merged upsampling with SSE2\n");
+#else
+  printf("%%undef JDMERGE_SSE2_SUPPORTED\t\t; merged upsampling with SSE2\n");
+#endif
+#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED
+  printf("%%define JDSAMPLE_FANCY_MMX_SUPPORTED\t; fancy upsampling with MMX\n");
+#else
+  printf("%%undef JDSAMPLE_FANCY_MMX_SUPPORTED\t; fancy upsampling with MMX\n");
+#endif
+#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED
+  printf("%%define JDSAMPLE_FANCY_SSE2_SUPPORTED\t; fancy upsampling with SSE2\n");
+#else
+  printf("%%undef JDSAMPLE_FANCY_SSE2_SUPPORTED\t; fancy upsampling with SSE2\n");
+#endif
+#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED
+  printf("%%define JDSAMPLE_SIMPLE_MMX_SUPPORTED\t; sloppy upsampling with MMX\n");
+#else
+  printf("%%undef JDSAMPLE_SIMPLE_MMX_SUPPORTED\t; sloppy upsampling with MMX\n");
+#endif
+#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED
+  printf("%%define JDSAMPLE_SIMPLE_SSE2_SUPPORTED\t; sloppy upsampling with SSE2\n");
+#else
+  printf("%%undef JDSAMPLE_SIMPLE_SSE2_SUPPORTED\t; sloppy upsampling with SSE2\n");
+#endif
+#ifdef JIDCT_INT_MMX_SUPPORTED
+  printf("%%define JIDCT_INT_MMX_SUPPORTED\t\t; inverse DCT with MMX\n");
+#else
+  printf("%%undef JIDCT_INT_MMX_SUPPORTED\t\t; inverse DCT with MMX\n");
+#endif
+#ifdef JIDCT_INT_SSE2_SUPPORTED
+  printf("%%define JIDCT_INT_SSE2_SUPPORTED\t; inverse DCT with SSE2\n");
+#else
+  printf("%%undef JIDCT_INT_SSE2_SUPPORTED\t\t; inverse DCT with SSE2\n");
+#endif
+#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED
+  printf("%%define JIDCT_FLT_3DNOW_MMX_SUPPORTED\t; inverse DCT with 3DNow!/MMX\n");
+#else
+  printf("%%undef JIDCT_FLT_3DNOW_MMX_SUPPORTED\t; inverse DCT with 3DNow!/MMX\n");
+#endif
+#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED
+  printf("%%define JIDCT_FLT_SSE_MMX_SUPPORTED\t; inverse DCT with SSE/MMX\n");
+#else
+  printf("%%undef JIDCT_FLT_SSE_MMX_SUPPORTED\t; inverse DCT with SSE/MMX\n");
+#endif
+#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED
+  printf("%%define JIDCT_FLT_SSE_SSE2_SUPPORTED\t; inverse DCT with SSE/SSE2\n");
+#else
+  printf("%%undef JIDCT_FLT_SSE_SSE2_SUPPORTED\t; inverse DCT with SSE/SSE2\n");
+#endif
+  printf("\n");
+}
+
+
+void
+print_jpeglib_h_macro (void)
+{
+  printf("\n");
+  printf("; ---- macros from jpeglib.h ----------------------------------------------\n");
+  printf("\n");
+
+  printf("; Version ID for the JPEG library.\n");
+  printf("; Might be useful for tests like \"#if JPEG_LIB_VERSION >= 60\".\n");
+  printf("\n");
+  printf("%%define JPEG_LIB_VERSION  %d\n", JPEG_LIB_VERSION);
+  printf("\n");
+  printf("; SIMD Ext: Version ID for the SIMD extension.\n");
+  printf("\n");
+  printf("%%define JPEG_SIMDEXT_VERSION  %d\n", JPEG_SIMDEXT_VERSION);
+  printf("%%define JPEG_SIMDEXT_VER_STR  \"%s\"\n", JPEG_SIMDEXT_VER_STR);
+  printf("\n");
+}
+
+
+int
+main (void)
+{
+  printf(";\n; jsimdcfg.inc --- generated by makecfg.c");
+#ifdef __DATE__
+#ifdef __TIME__
+  printf(" (%s, %s)", __DATE__, __TIME__);
+#endif
+#endif
+  printf("\n;\n\n");
+  printf("%%define JSIMDCFG_INCLUDED\t; so that jsimdcfg.inc doesn't do it again\n\n");
+
+  print_structure_offset();
+  print_jconfig_h_macro();
+  print_jmorecfg_h_macro();
+  print_jpeglib_h_macro();
+
+  exit(0);
+  return 0;                    /* suppress no-return-value warnings */
+}
index 829191359374f6691601ede53267ef016976ffd5..fb830fc6a14f411c4155bb0829fa37778b2fe852 100644 (file)
@@ -1,4 +1,5 @@
 # Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
 
 # This makefile is suitable for Unix-like systems with ANSI-capable compilers.
 # If you have a non-ANSI compiler, makefile.unix is a better starting point.
@@ -13,6 +14,13 @@ CFLAGS= -O
 # Generally, we recommend defining any configuration symbols in jconfig.h,
 # NOT via -D switches here.
 
+# The executable name of NASM and its options:
+NASM= nasm
+NAFLAGS= $(NASM_OBJFMT) -I./
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -felf -DELF
+
 # Link-time cc options:
 LDFLAGS= 
 
@@ -24,6 +32,10 @@ LDLIBS=
 # to use jmemansi.o or jmemname.o if you have limited swap space.
 SYSDEPMEM= jmemnobs.o
 
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimdgcc.o
+
 # miscellaneous OS-dependent stuff
 # linker
 LN= $(CC)
@@ -75,17 +87,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
 DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
         $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
 # library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
 # compression library object files
 CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
         jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
 # decompression library object files
 DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
         jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
 # These objectfiles are included in libjpeg.a
 LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
 # object files for sample applications (excluding library files)
@@ -125,7 +143,7 @@ jconfig.h: jconfig.doc
 
 clean:
        $(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom
-       $(RM) core testout*
+       $(RM) jsimdcfg.inc core testout*
 
 test: cjpeg djpeg jpegtran
        $(RM) testout*
@@ -143,10 +161,63 @@ test: cjpeg djpeg jpegtran
        cmp testorig.jpg testoutt.jpg
 
 
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg ./makecfg.c $(LDLIBS)
+       ./makecfg > jsimdcfg.inc
+       $(RM) ./makecfg
+
+.asm.o:
+       $(NASM) $(NAFLAGS) -o $@ $*.asm
+
+jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
 jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -157,33 +228,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.
 jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
diff --git a/makefile.bc5 b/makefile.bc5
new file mode 100644 (file)
index 0000000..f3f7df6
--- /dev/null
@@ -0,0 +1,320 @@
+# Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
+
+# This makefile is suitable for Borland C++ Compiler 5.5 (win32)
+
+# Read installation instructions before saying "make" !!
+
+!ifndef srcdir
+srcdir = .
+!endif
+.path.c   = $(srcdir)
+.path.h   = $(srcdir)
+.path.asm = $(srcdir)
+.path.inc = $(srcdir);.
+.path.doc = $(srcdir)
+
+# The name of your C compiler:
+CC= bcc32
+
+# You may need to adjust these cc options:
+CFLAGS= -O2 -OS -Oc -d -ff -w-par -w-aus -w-ccc -w-rch -q -I$(srcdir)
+# Generally, we recommend defining any configuration symbols in jconfig.h,
+# NOT via -D switches here.
+
+# The executable name of NASM and its options:
+NASM= nasmw
+NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fobj -DOBJ32
+
+# Link-time cc options:
+LDFLAGS= -tWC -q
+
+# To link any special libraries, add the necessary -l commands here.
+LDLIBS= noeh32.lib
+
+# Put here the object file name for the correct system-dependent memory
+# manager file. For Win32, we recommend jmemnobs.c (flat memory!)
+# SYSDEPMEMLIB must list the same files with "+" signs for the librarian.
+SYSDEPMEM= jmemnobs.obj
+SYSDEPMEMLIB= +jmemnobs.obj
+
+# OS-dependent SIMD instruction support checker
+# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc)
+SYSDEPSIMDCHK= jsimdw32.obj
+SYSDEPSIMDCHKLIB= +jsimdw32.obj
+
+# End of configurable options.
+
+
+# source files: JPEG library proper
+LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+        jquant2.c jutils.c jmemmgr.c
+# memmgr back ends: compile only one of these into a working library
+SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
+# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
+APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
+        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
+        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
+SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
+# files included by source files
+INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
+# documentation, test, and support files
+DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
+        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
+        coderules.doc filelist.doc change.log
+MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
+        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
+        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
+        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
+        makvms.opt
+CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
+        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
+        jconfig.vms
+CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
+OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
+TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
+        testimgp.jpg
+DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
+        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
+# library object files common to compression and decompression
+COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \
+        jsimdcpu.obj $(SYSDEPSIMDCHK)
+# compression library object files
+CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
+        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
+        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
+        jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \
+        jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \
+        jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \
+        jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \
+        jfsseflt.obj
+# decompression library object files
+DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
+        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
+        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \
+        jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \
+        jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \
+        jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \
+        jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \
+        jdcolss2.obj jdmermmx.obj jdmerss2.obj
+# These objectfiles are included in libjpeg.lib
+LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
+# object files for sample applications (excluding library files)
+COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
+        rdswitch.obj cdjpeg.obj
+DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
+        rdcolmap.obj cdjpeg.obj
+TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
+
+
+all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
+
+libjpeg.lib: $(LIBOBJECTS)
+       - del libjpeg.lib
+       tlib libjpeg.lib /E /C @&&|
++jcapimin.obj +jcapistd.obj +jctrans.obj +jcparam.obj +jdatadst.obj &
++jcinit.obj +jcmaster.obj +jcmarker.obj +jcmainct.obj +jcprepct.obj &
++jccoefct.obj +jccolor.obj +jcsample.obj +jchuff.obj +jcphuff.obj &
++jcdctmgr.obj +jccolmmx.obj +jccolss2.obj +jcsammmx.obj +jcsamss2.obj &
++jcqntint.obj +jcqntflt.obj +jcqntmmx.obj +jcqnt3dn.obj +jcqnts2i.obj &
++jcqntsse.obj +jcqnts2f.obj +jfdctint.obj +jfdctfst.obj +jfdctflt.obj &
++jfmmxint.obj +jfmmxfst.obj +jf3dnflt.obj +jfss2int.obj +jfss2fst.obj &
++jfsseflt.obj +jdapimin.obj +jdapistd.obj +jdtrans.obj +jdatasrc.obj &
++jdmaster.obj +jdinput.obj +jdmarker.obj +jdhuff.obj +jdphuff.obj &
++jdmainct.obj +jdcoefct.obj +jdpostct.obj +jddctmgr.obj +jdsample.obj &
++jdcolor.obj +jquant1.obj +jquant2.obj +jdmerge.obj +jidctint.obj &
++jidctfst.obj +jidctred.obj +jidctflt.obj +jimmxint.obj +jimmxfst.obj &
++jimmxred.obj +ji3dnflt.obj +jiss2int.obj +jiss2fst.obj +jiss2red.obj &
++jisseflt.obj +jiss2flt.obj +jdsammmx.obj +jdsamss2.obj +jdcolmmx.obj &
++jdcolss2.obj +jdmermmx.obj +jdmerss2.obj +jcomapi.obj +jutils.obj &
++jerror.obj +jmemmgr.obj $(SYSDEPMEMLIB) +jsimdcpu.obj $(SYSDEPSIMDCHKLIB)
+|
+
+cjpeg.exe: $(COBJECTS) libjpeg.lib
+       $(CC) $(LDFLAGS) -ecjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS)
+
+djpeg.exe: $(DOBJECTS) libjpeg.lib
+       $(CC) $(LDFLAGS) -edjpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS)
+
+jpegtran.exe: $(TROBJECTS) libjpeg.lib
+       $(CC) $(LDFLAGS) -ejpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS)
+
+rdjpgcom.exe: rdjpgcom.obj
+       $(CC) $(LDFLAGS) -erdjpgcom.exe rdjpgcom.obj $(LDLIBS)
+
+wrjpgcom.exe: wrjpgcom.obj
+       $(CC) $(LDFLAGS) -ewrjpgcom.exe wrjpgcom.obj $(LDLIBS)
+
+# This "{}" syntax allows Borland Make to "batch" source files.
+# In this way, each run of the compiler can build many modules.
+.c.obj:
+       $(CC) $(CFLAGS) -c{ $<}
+
+jconfig.h: jconfig.doc
+       echo You must prepare a system-dependent jconfig.h file.
+       echo Please read the installation directions in install.doc.
+       exit 1
+
+clean:
+       - del *.obj
+       - del *.tds
+       - del cjpeg.exe
+       - del djpeg.exe
+       - del jpegtran.exe
+       - del rdjpgcom.exe
+       - del wrjpgcom.exe
+       - del jsimdcfg.inc
+       - del libjpeg.lib
+       - del testout*.*
+
+test: cjpeg.exe djpeg.exe jpegtran.exe
+       - del testout*.*
+       djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg
+       djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg
+       cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm
+       djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg
+       cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm
+       jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg
+       fc /b $(srcdir)\testimg.ppm testout.ppm
+       fc /b $(srcdir)\testimg.bmp testout.bmp
+       fc /b $(srcdir)\testimg.jpg testout.jpg
+       fc /b $(srcdir)\testimg.ppm testoutp.ppm
+       fc /b $(srcdir)\testimgp.jpg testoutp.jpg
+       fc /b $(srcdir)\testorig.jpg testoutt.jpg
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(srcdir)\makecfg.c
+       $(CC) $(LDFLAGS) -emakecfg.exe makecfg.obj $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       - del makecfg.tds
+       - del makecfg.obj
+       - del makecfg.exe
+
+.asm.obj:
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
+jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
+# jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
+rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
+wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
+cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
+rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
index f25e42e3e66d90997777a6671db3b18ab72166fe..c18b7e6a392ce60083d2944af23e5e67d905c0e7 100644 (file)
@@ -1,4 +1,5 @@
 # Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
 
 # makefile.cfg is edited by configure to produce a custom Makefile.
 
@@ -16,8 +17,9 @@ libdir = $(exec_prefix)/lib
 includedir = $(prefix)/include
 binprefix =
 manprefix =
-manext = 1
-mandir = $(prefix)/man/man$(manext)
+manext = .1
+mandir = $(prefix)/man
+man1dir = $(mandir)/man1
 
 # The name of your C compiler:
 CC= @CC@
@@ -29,6 +31,10 @@ CFLAGS= @CFLAGS@ @CPPFLAGS@ @INCLUDEFLAGS@
 # However, any special defines for ansi2knr.c may be included here:
 ANSI2KNRFLAGS= @ANSI2KNRFLAGS@
 
+# The executable name of NASM and its options:
+NASM= @NASM@
+NAFLAGS= @NAFLAGS@ @INCLUDEFLAGS@
+
 # Link-time cc options:
 LDFLAGS= @LDFLAGS@
 
@@ -37,6 +43,7 @@ LDLIBS= @LIBS@
 
 # If using GNU libtool, LIBTOOL references it; if not, LIBTOOL is empty.
 LIBTOOL = @LIBTOOL@
+top_builddir = .
 # $(O) expands to "lo" if using libtool, plain "o" if not.
 # Similarly, $(A) expands to "la" or "a".
 O = @O@
@@ -51,8 +58,12 @@ JPEG_LIB_VERSION = @JPEG_LIB_VERSION@
 # to use jmemansi.o or jmemname.o if you have limited swap space.
 SYSDEPMEM= @MEMORYMGR@
 
+# OS-dependent SIMD instruction support checker
+# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc)
+SYSDEPSIMDCHK= @SIMDCHECKER@
+
 # miscellaneous OS-dependent stuff
-SHELL= /bin/sh
+SHELL= @SHELL@
 # linker
 LN= @LN@
 # file deletion command
@@ -68,6 +79,11 @@ INSTALL= @INSTALL@
 INSTALL_PROGRAM= @INSTALL_PROGRAM@
 INSTALL_LIB= @INSTALL_LIB@
 INSTALL_DATA= @INSTALL_DATA@
+# uninstallation program
+UNINSTALL= @UNINSTALL@
+# executable suffix. under cygwin,
+# 'rm' doesn't know that executables have .exe suffix.
+EXE = @EXEEXT@
 
 # End of configurable options.
 
@@ -110,19 +126,26 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
 DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
         $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
 # library object files common to compression and decompression
-COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM)
+COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM) \
+        jsimdcpu.$(O) $(SYSDEPSIMDCHK)
 # compression library object files
 CLIBOBJECTS= jcapimin.$(O) jcapistd.$(O) jctrans.$(O) jcparam.$(O) \
         jdatadst.$(O) jcinit.$(O) jcmaster.$(O) jcmarker.$(O) jcmainct.$(O) \
         jcprepct.$(O) jccoefct.$(O) jccolor.$(O) jcsample.$(O) jchuff.$(O) \
-        jcphuff.$(O) jcdctmgr.$(O) jfdctfst.$(O) jfdctflt.$(O) \
-        jfdctint.$(O)
+        jcphuff.$(O) jcdctmgr.$(O) jccolmmx.$(O) jccolss2.$(O) jcsammmx.$(O) \
+        jcsamss2.$(O) jcqntint.$(O) jcqntflt.$(O) jcqntmmx.$(O) jcqnt3dn.$(O) \
+        jcqnts2i.$(O) jcqntsse.$(O) jcqnts2f.$(O) jfdctint.$(O) jfdctfst.$(O) \
+        jfdctflt.$(O) jfmmxint.$(O) jfmmxfst.$(O) jf3dnflt.$(O) jfss2int.$(O) \
+        jfss2fst.$(O) jfsseflt.$(O)
 # decompression library object files
 DLIBOBJECTS= jdapimin.$(O) jdapistd.$(O) jdtrans.$(O) jdatasrc.$(O) \
         jdmaster.$(O) jdinput.$(O) jdmarker.$(O) jdhuff.$(O) jdphuff.$(O) \
-        jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) \
-        jidctfst.$(O) jidctflt.$(O) jidctint.$(O) jidctred.$(O) \
-        jdsample.$(O) jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O)
+        jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) jdsample.$(O) \
+        jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O) jidctint.$(O) \
+        jidctfst.$(O) jidctred.$(O) jidctflt.$(O) jimmxint.$(O) jimmxfst.$(O) \
+        jimmxred.$(O) ji3dnflt.$(O) jiss2int.$(O) jiss2fst.$(O) jiss2red.$(O) \
+        jisseflt.$(O) jiss2flt.$(O) jdsammmx.$(O) jdsamss2.$(O) jdcolmmx.$(O) \
+        jdcolss2.$(O) jdmermmx.$(O) jdmerss2.$(O)
 # These objectfiles are included in libjpeg.a
 LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
 # object files for sample applications (excluding library files)
@@ -136,12 +159,19 @@ TROBJECTS= jpegtran.$(O) rdswitch.$(O) cdjpeg.$(O) transupp.$(O)
 all: @A2K_DEPS@ libjpeg.$(A) cjpeg djpeg jpegtran rdjpgcom wrjpgcom
 
 # Special compilation rules to support ansi2knr and libtool.
-.SUFFIXES: .lo .la
+.SUFFIXES: .lo .la .asm
+
+.asm.o:
+       $(SHELL) $(srcdir)/nasm_lt.sh $(NASM) $(NAFLAGS) $(srcdir)/$*.asm
 
 # How to compile with libtool.
 @COM_LT@.c.lo:
 @COM_LT@       $(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c $(srcdir)/$*.c
 
+@COM_LT@.asm.lo:
+@COM_LT@       $(LIBTOOL) --mode=compile @TAGCC@ $(SHELL) $(srcdir)/nasm_lt.sh \
+@COM_LT@               $(NASM) $(NAFLAGS) $(srcdir)/$*.asm
+
 # How to use ansi2knr, when not using libtool.
 @COM_A2K@.c.o:
 @COM_A2K@      ./ansi2knr $(srcdir)/$*.c knr/$*.c
@@ -169,7 +199,7 @@ libjpeg.a: @A2K_DEPS@ $(LIBOBJECTS)
 # with libtool:
 libjpeg.la: @A2K_DEPS@ $(LIBOBJECTS)
        $(LIBTOOL) --mode=link $(CC) -o libjpeg.la $(LIBOBJECTS) \
-               -rpath $(libdir) -version-info $(JPEG_LIB_VERSION)
+               -no-undefined -rpath $(libdir) -version-info $(JPEG_LIB_VERSION)
 
 # sample programs:
 
@@ -191,34 +221,62 @@ wrjpgcom: wrjpgcom.$(O)
 # Installation rules:
 
 install: cjpeg djpeg jpegtran rdjpgcom wrjpgcom @FORCE_INSTALL_LIB@
+       -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi
+       -@if [ ! -d $(man1dir) ]; then mkdir -p $(man1dir); fi
        $(INSTALL_PROGRAM) cjpeg $(bindir)/$(binprefix)cjpeg
        $(INSTALL_PROGRAM) djpeg $(bindir)/$(binprefix)djpeg
        $(INSTALL_PROGRAM) jpegtran $(bindir)/$(binprefix)jpegtran
        $(INSTALL_PROGRAM) rdjpgcom $(bindir)/$(binprefix)rdjpgcom
        $(INSTALL_PROGRAM) wrjpgcom $(bindir)/$(binprefix)wrjpgcom
-       $(INSTALL_DATA) $(srcdir)/cjpeg.1 $(mandir)/$(manprefix)cjpeg.$(manext)
-       $(INSTALL_DATA) $(srcdir)/djpeg.1 $(mandir)/$(manprefix)djpeg.$(manext)
-       $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext)
-       $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext)
-       $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext)
+       $(INSTALL_DATA) $(srcdir)/cjpeg.1 $(man1dir)/$(manprefix)cjpeg$(manext)
+       $(INSTALL_DATA) $(srcdir)/djpeg.1 $(man1dir)/$(manprefix)djpeg$(manext)
+       $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(man1dir)/$(manprefix)jpegtran$(manext)
+       $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(man1dir)/$(manprefix)rdjpgcom$(manext)
+       $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(man1dir)/$(manprefix)wrjpgcom$(manext)
 
 install-lib: libjpeg.$(A) install-headers
+       -@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi
        $(INSTALL_LIB) libjpeg.$(A) $(libdir)/$(binprefix)libjpeg.$(A)
 
 install-headers: jconfig.h
+       -@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi
        $(INSTALL_DATA) jconfig.h $(includedir)/jconfig.h
        $(INSTALL_DATA) $(srcdir)/jpeglib.h $(includedir)/jpeglib.h
        $(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h
        $(INSTALL_DATA) $(srcdir)/jerror.h $(includedir)/jerror.h
 
+# Uninstallation rules:
+
+uninstall: @UNINSTALL_LIB@
+       $(UNINSTALL) $(bindir)/$(binprefix)cjpeg$(EXE)
+       $(UNINSTALL) $(bindir)/$(binprefix)djpeg$(EXE)
+       $(UNINSTALL) $(bindir)/$(binprefix)jpegtran$(EXE)
+       $(UNINSTALL) $(bindir)/$(binprefix)rdjpgcom$(EXE)
+       $(UNINSTALL) $(bindir)/$(binprefix)wrjpgcom$(EXE)
+       $(UNINSTALL) $(man1dir)/$(manprefix)cjpeg$(manext)
+       $(UNINSTALL) $(man1dir)/$(manprefix)djpeg$(manext)
+       $(UNINSTALL) $(man1dir)/$(manprefix)jpegtran$(manext)
+       $(UNINSTALL) $(man1dir)/$(manprefix)rdjpgcom$(manext)
+       $(UNINSTALL) $(man1dir)/$(manprefix)wrjpgcom$(manext)
+
+uninstall-lib: uninstall-headers
+       $(UNINSTALL) $(libdir)/$(binprefix)libjpeg.$(A)
+
+uninstall-headers:
+       $(UNINSTALL) $(includedir)/jconfig.h
+       $(UNINSTALL) $(includedir)/jpeglib.h
+       $(UNINSTALL) $(includedir)/jmorecfg.h
+       $(UNINSTALL) $(includedir)/jerror.h
+
 clean:
-       $(RM) *.o *.lo libjpeg.a libjpeg.la
-       $(RM) cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-       $(RM) ansi2knr core testout* config.log config.status
+       $(RM) jsimdcfg.inc *.o *.lo libjpeg.a libjpeg.la
+#       under cygwin, libtool will create wrapper scripts without suffix.
+       $(RM) cjpeg djpeg jpegtran cjpeg$(EXE) djpeg$(EXE) jpegtran$(EXE)
+       $(RM) rdjpgcom$(EXE) wrjpgcom$(EXE) ansi2knr$(EXE) core testout*
        $(RM) -r knr .libs _libs
 
 distclean: clean
-       $(RM) Makefile jconfig.h libtool config.cache
+       $(RM) Makefile jconfig.h libtool config.cache config.status config.log
 
 test: cjpeg djpeg jpegtran
        $(RM) testout*
@@ -248,10 +306,60 @@ jconfig.h: jconfig.doc
 .PHONY: all install install-lib install-headers clean distclean test check
 
 
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg $(srcdir)/makecfg.c $(LDLIBS)
+       ./makecfg > jsimdcfg.inc
+       $(RM) makecfg$(EXE)
+
+jsimdcpu.$(O): jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.$(O): jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.$(O): jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.$(O): jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.$(O): jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.$(O): jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.$(O): jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.$(O): jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.$(O): jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.$(O): jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.$(O): jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.$(O): jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.$(O): jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.$(O): jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.$(O): jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.$(O): jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.$(O): jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.$(O): jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.$(O): jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.$(O): jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.$(O): jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.$(O): jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.$(O): jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.$(O): jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.$(O): jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.$(O): jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.$(O): jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.$(O): jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.$(O): jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.$(O): jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.$(O): jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.$(O): jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.$(O): jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.$(O): jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.$(O): jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.$(O): jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.$(O): ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.$(O): jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.$(O): jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.$(O): jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.$(O): jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.$(O): jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.$(O): jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
 jcapimin.$(O): jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcapistd.$(O): jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jccoefct.$(O): jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.$(O): jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.$(O): jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jcdctmgr.$(O): jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jchuff.$(O): jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcinit.$(O): jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -262,33 +370,33 @@ jcomapi.$(O): jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerr
 jcparam.$(O): jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcphuff.$(O): jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcprepct.$(O): jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.$(O): jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.$(O): jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jctrans.$(O): jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapimin.$(O): jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapistd.$(O): jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdatadst.$(O): jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdatasrc.$(O): jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdcoefct.$(O): jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.$(O): jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.$(O): jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jddctmgr.$(O): jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jdhuff.$(O): jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdinput.$(O): jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmainct.$(O): jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmarker.$(O): jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmaster.$(O): jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.$(O): jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.$(O): jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdphuff.$(O): jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdpostct.$(O): jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.$(O): jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.$(O): jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdtrans.$(O): jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jerror.$(O): jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.$(O): jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.$(O): jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctflt.$(O): jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctfst.$(O): jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jquant1.$(O): jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jquant2.$(O): jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jutils.$(O): jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
index f766d25e38a7d76ea0007a0243a4340be5b78140..21864684b70f642274d8c5089d6af7e15af47e8f 100644 (file)
@@ -1,18 +1,34 @@
 # Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
 
 # This makefile is for DJGPP (Delorie's GNU C port on MS-DOS), v2.0 or later.
 # Thanks to Frank J. Donahoe for this version.
 
 # Read installation instructions before saying "make" !!
 
+srcdir = .
+VPATH  = $(srcdir)
+
 # The name of your C compiler:
 CC= gcc
 
 # You may need to adjust these cc options:
-CFLAGS= -O2 -Wall -I.
+# For gcc 3.4.x
+CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \
+        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
+# For gcc 3.3.x
+#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \
+#        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
 # Generally, we recommend defining any configuration symbols in jconfig.h,
 # NOT via -D switches here.
 
+# The executable name of NASM and its options:
+NASM= nasm
+NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fcoff -DDJGPP
+
 # Link-time cc options:
 LDFLAGS= -s
 
@@ -24,6 +40,10 @@ LDLIBS=
 # use jmemname.o if you want to use named temp files instead of swap space.
 SYSDEPMEM= jmemnobs.o
 
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimddjg.o
+
 # miscellaneous OS-dependent stuff
 # linker
 LN= $(CC)
@@ -75,17 +95,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
 DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
         $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
 # library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
 # compression library object files
 CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
         jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
 # decompression library object files
 DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
         jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
 # These objectfiles are included in libjpeg.a
 LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
 # object files for sample applications (excluding library files)
@@ -130,29 +156,83 @@ clean:
        $(RM) jpegtran.exe
        $(RM) rdjpgcom.exe
        $(RM) wrjpgcom.exe
+       $(RM) jsimdcfg.inc
        $(RM) libjpeg.a
        $(RM) testout*.*
 
 test: cjpeg.exe djpeg.exe jpegtran.exe
        $(RM) testout*.*
-       ./djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-       ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-       ./cjpeg -dct int -outfile testout.jpg  testimg.ppm
-       ./djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-       ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-       ./jpegtran -outfile testoutt.jpg testprog.jpg
-       fc /b testimg.ppm testout.ppm
-       fc /b testimg.bmp testout.bmp
-       fc /b testimg.jpg testout.jpg
-       fc /b testimg.ppm testoutp.ppm
-       fc /b testimgp.jpg testoutp.jpg
-       fc /b testorig.jpg testoutt.jpg
+       ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg
+       ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg
+       ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm
+       ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg
+       ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm
+       ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg
+       fc /b $(srcdir)\testimg.ppm testout.ppm
+       fc /b $(srcdir)\testimg.bmp testout.bmp
+       fc /b $(srcdir)\testimg.jpg testout.jpg
+       fc /b $(srcdir)\testimg.ppm testoutp.ppm
+       fc /b $(srcdir)\testimgp.jpg testoutp.jpg
+       fc /b $(srcdir)\testorig.jpg testoutt.jpg
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       $(RM) makecfg.exe
+
+%.o : %.asm
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
 
+jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 
 jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -163,33 +243,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.
 jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
diff --git a/makefile.linux b/makefile.linux
new file mode 100644 (file)
index 0000000..54e1d65
--- /dev/null
@@ -0,0 +1,449 @@
+# Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
+
+# This makefile is for Linux ELF with gcc
+
+# Read installation instructions before saying "make" !!
+
+# For compiling with source and object files in different directories.
+srcdir = .
+VPATH  = $(srcdir)
+
+# Where to install the programs and man pages.
+prefix = /usr/local
+exec_prefix = ${prefix}
+bindir = $(exec_prefix)/bin
+libdir = $(exec_prefix)/lib
+includedir = $(prefix)/include
+binprefix =
+manprefix =
+manext = 1
+mandir = $(prefix)/man/man$(manext)
+
+LNNAME = libjpeg.so
+SONAME = libjpeg.so.62
+LIBNAME        = libjpeg.so.62.1.0
+
+# The name of your C compiler:
+CC= gcc
+
+# You may need to adjust these cc options:
+CFLAGS= -O2 -mcpu=i686 -march=i386 -I$(srcdir)
+# Generally, we recommend defining any configuration symbols in jconfig.h,
+# NOT via -D switches here.
+
+# The executable name of NASM and its options:
+NASM= nasm
+NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -felf -DELF
+
+# Link-time cc options:
+LDFLAGS= 
+
+# To link any special libraries, add the necessary -l commands here.
+LDLIBS= 
+
+# Put here the object file name for the correct system-dependent memory
+# manager file.  For Unix this is usually jmemnobs.o, but you may want
+# to use jmemansi.o or jmemname.o if you have limited swap space.
+SYSDEPMEM= jmemnobs.o
+
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimdgcc.o
+
+# miscellaneous OS-dependent stuff
+# linker
+LN= $(CC)
+# file deletion command
+RM= rm -f
+# library (.a) file creation command
+AR= ar rc
+# second step in .a creation (use "touch" if not needed)
+AR2= ranlib
+# installation program
+INSTALL= install -c
+INSTALL_PROGRAM= ${INSTALL} -s
+INSTALL_SHARED = ${INSTALL}
+INSTALL_LIB=  ${INSTALL} -m 644
+INSTALL_DATA= ${INSTALL} -m 644
+
+# End of configurable options.
+
+
+# source files: JPEG library proper
+LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+        jquant2.c jutils.c jmemmgr.c
+# memmgr back ends: compile only one of these into a working library
+SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
+# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
+APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
+        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
+        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
+SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
+# files included by source files
+INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
+# documentation, test, and support files
+DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
+        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
+        coderules.doc filelist.doc change.log
+MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
+        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
+        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
+        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
+        makvms.opt
+CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
+        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
+        jconfig.vms
+CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
+OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
+TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
+        testimgp.jpg
+DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
+        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
+# library object files common to compression and decompression
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
+# compression library object files
+CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
+        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
+# decompression library object files
+DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
+        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
+# These objectfiles are included in libjpeg.a
+LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
+# These objectfiles are included in libjpeg.so
+DLLOBJECTS= $(LIBOBJECTS:.o=.pic.o)
+# object files for sample applications (excluding library files)
+COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
+        cdjpeg.o
+DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
+        cdjpeg.o
+TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
+
+
+all: static shared app
+app: cjpeg djpeg jpegtran rdjpgcom wrjpgcom
+app-static: cjpeg-static djpeg-static jpegtran-static
+shared: $(LIBNAME)
+static: libjpeg.a
+
+libjpeg.a: $(LIBOBJECTS)
+       $(RM) libjpeg.a
+       $(AR) libjpeg.a  $(LIBOBJECTS)
+       $(AR2) libjpeg.a
+
+$(LIBNAME): $(DLLOBJECTS)
+       $(CC) -shared -Wl,-soname,$(SONAME) -o $(LIBNAME) $(DLLOBJECTS)
+
+$(SONAME): $(LIBNAME)
+       ln -sf $(LIBNAME) $(SONAME)
+
+$(LNNAME): $(SONAME)
+       ln -sf $(LIBNAME) $(LNNAME)
+
+cjpeg-static: $(COBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o cjpeg-static $(COBJECTS) libjpeg.a $(LDLIBS)
+
+djpeg-static: $(DOBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o djpeg-static $(DOBJECTS) libjpeg.a $(LDLIBS)
+
+jpegtran-static: $(TROBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o jpegtran-static $(TROBJECTS) libjpeg.a $(LDLIBS)
+
+cjpeg-shared: $(COBJECTS) $(LNNAME)
+       $(LN) $(LDFLAGS) -o cjpeg-shared $(COBJECTS) $(LNNAME) $(LDLIBS)
+
+djpeg-shared: $(DOBJECTS) $(LNNAME)
+       $(LN) $(LDFLAGS) -o djpeg-shared $(DOBJECTS) $(LNNAME) $(LDLIBS)
+
+jpegtran-shared: $(TROBJECTS) $(LNNAME)
+       $(LN) $(LDFLAGS) -o jpegtran-shared $(TROBJECTS) $(LNNAME) $(LDLIBS)
+
+rdjpgcom: rdjpgcom.o
+       $(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS)
+
+wrjpgcom: wrjpgcom.o
+       $(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS)
+
+cjpeg: cjpeg-shared
+       echo '#!/bin/sh'                                       > cjpeg
+       echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> cjpeg
+       echo exec `pwd`/cjpeg-shared '"$$@"'                  >> cjpeg
+       chmod +x cjpeg
+
+djpeg: djpeg-shared
+       echo '#!/bin/sh'                                       > djpeg
+       echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> djpeg
+       echo exec `pwd`/djpeg-shared '"$$@"'                  >> djpeg
+       chmod +x djpeg
+
+jpegtran: jpegtran-shared
+       echo '#!/bin/sh'                                       > jpegtran
+       echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> jpegtran
+       echo exec `pwd`/jpegtran-shared '"$$@"'               >> jpegtran
+       chmod +x jpegtran
+
+jconfig.h: jconfig.doc
+       echo You must prepare a system-dependent jconfig.h file.
+       echo Please read the installation directions in install.doc.
+       exit 1
+
+clean:
+       $(RM) *.o libjpeg.a $(LIBNAME) $(SONAME) $(LNNAME)
+       $(RM) cjpeg djpeg jpegtran rdjpgcom wrjpgcom
+       $(RM) cjpeg-shared djpeg-shared jpegtran-shared
+       $(RM) cjpeg-static djpeg-static jpegtran-static
+       $(RM) core testout*
+       $(RM) jsimdcfg.inc
+
+test: cjpeg djpeg jpegtran
+       $(RM) testout*
+       ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)/testorig.jpg
+       ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)/testorig.jpg
+       ./cjpeg -dct int -outfile testout.jpg $(srcdir)/testimg.ppm
+       ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg
+       ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm
+       ./jpegtran -outfile testoutt.jpg $(srcdir)/testprog.jpg
+       cmp $(srcdir)/testimg.ppm testout.ppm
+       cmp $(srcdir)/testimg.bmp testout.bmp
+       cmp $(srcdir)/testimg.jpg testout.jpg
+       cmp $(srcdir)/testimg.ppm testoutp.ppm
+       cmp $(srcdir)/testimgp.jpg testoutp.jpg
+       cmp $(srcdir)/testorig.jpg testoutt.jpg
+
+test-static: cjpeg-static djpeg-static jpegtran-static
+       $(RM) testout*
+       ./djpeg-static -dct int -ppm -outfile testout.ppm $(srcdir)/testorig.jpg
+       ./djpeg-static -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)/testorig.jpg
+       ./cjpeg-static -dct int -outfile testout.jpg $(srcdir)/testimg.ppm
+       ./djpeg-static -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg
+       ./cjpeg-static -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm
+       ./jpegtran-static -outfile testoutt.jpg $(srcdir)/testprog.jpg
+       cmp $(srcdir)/testimg.ppm testout.ppm
+       cmp $(srcdir)/testimg.bmp testout.bmp
+       cmp $(srcdir)/testimg.jpg testout.jpg
+       cmp $(srcdir)/testimg.ppm testoutp.ppm
+       cmp $(srcdir)/testimgp.jpg testoutp.jpg
+       cmp $(srcdir)/testorig.jpg testoutt.jpg
+
+
+install: install-lib install-app install-man
+
+install-app-static: cjpeg-static djpeg-static jpegtran-static
+       -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi
+       $(INSTALL_PROGRAM) cjpeg-static    $(bindir)/$(binprefix)cjpeg-static
+       $(INSTALL_PROGRAM) djpeg-static    $(bindir)/$(binprefix)djpeg-static
+       $(INSTALL_PROGRAM) jpegtran-static $(bindir)/$(binprefix)jpegtran-static
+
+install-app: install-lib cjpeg-shared djpeg-shared jpegtran-shared rdjpgcom wrjpgcom
+       -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi
+       $(INSTALL_PROGRAM) cjpeg-shared    $(bindir)/$(binprefix)cjpeg
+       $(INSTALL_PROGRAM) djpeg-shared    $(bindir)/$(binprefix)djpeg
+       $(INSTALL_PROGRAM) jpegtran-shared $(bindir)/$(binprefix)jpegtran
+       $(INSTALL_PROGRAM) rdjpgcom        $(bindir)/$(binprefix)rdjpgcom
+       $(INSTALL_PROGRAM) wrjpgcom        $(bindir)/$(binprefix)wrjpgcom
+
+install-man: cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1
+       -@if [ ! -d $(mandir) ]; then mkdir -p $(mandir); fi
+       $(INSTALL_DATA) $(srcdir)/cjpeg.1    $(mandir)/$(manprefix)cjpeg.$(manext)
+       $(INSTALL_DATA) $(srcdir)/djpeg.1    $(mandir)/$(manprefix)djpeg.$(manext)
+       $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext)
+       $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext)
+       $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext)
+
+install-lib: install-headers libjpeg.a $(LIBNAME)
+       -@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi
+       $(INSTALL_LIB)    libjpeg.a  $(libdir)/libjpeg.a
+       $(INSTALL_SHARED) $(LIBNAME) $(libdir)/$(LIBNAME)
+       (cd $(libdir); ln -sf $(LIBNAME) $(SONAME); ln -sf $(LIBNAME) $(LNNAME))
+
+install-headers: jconfig.h jpeglib.h jmorecfg.h jerror.h
+       -@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi
+       $(INSTALL_DATA) $(srcdir)/jconfig.h  $(includedir)/jconfig.h
+       $(INSTALL_DATA) $(srcdir)/jpeglib.h  $(includedir)/jpeglib.h
+       $(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h
+       $(INSTALL_DATA) $(srcdir)/jerror.h   $(includedir)/jerror.h
+
+uninstall: uninstall-lib uninstall-app uninstall-man
+
+uninstall-app-static:
+       $(RM) $(bindir)/$(binprefix)cjpeg-static
+       $(RM) $(bindir)/$(binprefix)djpeg-static
+       $(RM) $(bindir)/$(binprefix)jpegtran-static
+
+uninstall-app: uninstall-lib
+       $(RM) $(bindir)/$(binprefix)cjpeg
+       $(RM) $(bindir)/$(binprefix)djpeg
+       $(RM) $(bindir)/$(binprefix)jpegtran
+       $(RM) $(bindir)/$(binprefix)rdjpgcom
+       $(RM) $(bindir)/$(binprefix)wrjpgcom
+
+uninstall-man:
+       $(RM) $(mandir)/$(manprefix)cjpeg.$(manext)
+       $(RM) $(mandir)/$(manprefix)djpeg.$(manext)
+       $(RM) $(mandir)/$(manprefix)jpegtran.$(manext)
+       $(RM) $(mandir)/$(manprefix)rdjpgcom.$(manext)
+       $(RM) $(mandir)/$(manprefix)wrjpgcom.$(manext)
+
+uninstall-lib: uninstall-headers
+       $(RM) $(libdir)/libjpeg.a
+       $(RM) $(libdir)/$(LIBNAME)
+       $(RM) $(libdir)/$(SONAME)
+       $(RM) $(libdir)/$(LNNAME)
+
+uninstall-headers:
+       $(RM) $(includedir)/jconfig.h
+       $(RM) $(includedir)/jpeglib.h
+       $(RM) $(includedir)/jmorecfg.h
+       $(RM) $(includedir)/jerror.h
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg $(srcdir)/makecfg.c $(LDLIBS)
+       ./makecfg > jsimdcfg.inc
+       $(RM) ./makecfg
+
+.SUFFIXES: .c .asm .o .pic.o
+
+%.pic.o : %.c
+       $(CC) $(CFLAGS) -fPIC -c -o $@ $<
+
+%.pic.o : %.asm
+       $(NASM) $(NAFLAGS) -DPIC -o $@ $<
+
+%.o : %.asm
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.o jsimdcpu.pic.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o jsimdw32.pic.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o jsimddjg.pic.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o jccolmmx.pic.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o jccolss2.pic.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o jcsammmx.pic.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o jcsamss2.pic.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o jdcolmmx.pic.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o jdcolss2.pic.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o jdmermmx.pic.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o jdmerss2.pic.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o jdsammmx.pic.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o jdsamss2.pic.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o jcqntint.pic.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o jcqntflt.pic.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o jcqntmmx.pic.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o jcqnt3dn.pic.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o jcqnts2i.pic.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o jcqntsse.pic.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o jcqnts2f.pic.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o jfdctint.pic.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o jfdctfst.pic.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o jfdctflt.pic.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o jfmmxint.pic.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o jfmmxfst.pic.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o jf3dnflt.pic.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o jfss2int.pic.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o jfss2fst.pic.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o jfsseflt.pic.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o jidctint.pic.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o jidctfst.pic.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o jidctred.pic.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o jidctflt.pic.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o jimmxint.pic.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o jimmxfst.pic.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o jimmxred.pic.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o ji3dnflt.pic.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o jiss2int.pic.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o jiss2fst.pic.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o jiss2red.pic.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o jisseflt.pic.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o jiss2flt.pic.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.o jsimdgcc.pic.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
+jcapimin.o jcapimin.pic.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcapistd.o jcapistd.pic.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccoefct.o jccoefct.pic.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o jccolor.pic.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jcdctmgr.o jcdctmgr.pic.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jchuff.o jchuff.pic.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcinit.o jcinit.pic.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmainct.o jcmainct.pic.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmarker.o jcmarker.pic.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmaster.o jcmaster.pic.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcomapi.o jcomapi.pic.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcparam.o jcparam.pic.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcphuff.o jcphuff.pic.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcprepct.o jcprepct.pic.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o jcsample.pic.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jctrans.o jctrans.pic.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapimin.o jdapimin.pic.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapistd.o jdapistd.pic.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdatadst.o jdatadst.pic.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdatasrc.o jdatasrc.pic.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdcoefct.o jdcoefct.pic.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o jdcolor.pic.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jddctmgr.o jddctmgr.pic.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jdhuff.o jdhuff.pic.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdinput.o jdinput.pic.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmainct.o jdmainct.pic.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmarker.o jdmarker.pic.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmaster.o jdmaster.pic.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o jdmerge.pic.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdphuff.o jdphuff.pic.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdpostct.o jdpostct.pic.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o jdsample.pic.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdtrans.o jdtrans.pic.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jerror.o jerror.pic.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
+# jfdctflt.o jfdctflt.pic.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctfst.o jfdctfst.pic.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctint.o jfdctint.pic.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctflt.o jidctflt.pic.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctfst.o jidctfst.pic.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctint.o jidctint.pic.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctred.o jidctred.pic.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jquant1.o jquant1.pic.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jquant2.o jquant2.pic.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jutils.o jutils.pic.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jmemmgr.o jmemmgr.pic.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemansi.o jmemansi.pic.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemname.o jmemname.pic.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemnobs.o jmemnobs.pic.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemdos.o jmemdos.pic.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemmac.o jmemmac.pic.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
+rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
+wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
+cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
+rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.mgw b/makefile.mgw
new file mode 100644 (file)
index 0000000..06f09e0
--- /dev/null
@@ -0,0 +1,298 @@
+# Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
+
+# This makefile is for MinGW.
+
+# Read installation instructions before saying "make" !!
+
+srcdir = .
+VPATH  = $(srcdir)
+
+# The name of your C compiler:
+CC= gcc
+
+# You may need to adjust these cc options:
+# For gcc 3.4.x
+CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \
+        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
+# For gcc 3.3.x
+#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \
+#        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
+# Generally, we recommend defining any configuration symbols in jconfig.h,
+# NOT via -D switches here.
+
+# The executable name of NASM and its options:
+NASM= nasmw
+NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fwin32 -DWIN32
+
+# Link-time cc options:
+LDFLAGS= -s
+
+# To link any special libraries, add the necessary -l commands here.
+LDLIBS= 
+
+# Put here the object file name for the correct system-dependent memory
+# manager file.
+SYSDEPMEM= jmemnobs.o
+
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimdw32.o
+
+# miscellaneous OS-dependent stuff
+# linker
+LN= $(CC)
+# file deletion command
+RM= del
+# library (.a) file creation command
+AR= ar rc
+# second step in .a creation (use "touch" if not needed)
+AR2= ranlib
+
+# End of configurable options.
+
+
+# source files: JPEG library proper
+LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+        jquant2.c jutils.c jmemmgr.c
+# memmgr back ends: compile only one of these into a working library
+SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
+# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
+APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
+        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
+        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
+SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
+# files included by source files
+INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
+# documentation, test, and support files
+DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
+        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
+        coderules.doc filelist.doc change.log
+MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
+        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
+        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
+        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
+        makvms.opt
+CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
+        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
+        jconfig.vms
+CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
+OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
+TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
+        testimgp.jpg
+DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
+        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
+# library object files common to compression and decompression
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
+# compression library object files
+CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
+        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
+# decompression library object files
+DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
+        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
+# These objectfiles are included in libjpeg.a
+LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
+# object files for sample applications (excluding library files)
+COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
+        cdjpeg.o
+DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
+        cdjpeg.o
+TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
+
+
+all: libjpeg.a cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
+
+libjpeg.a: $(LIBOBJECTS)
+       -$(RM) libjpeg.a
+       $(AR)  libjpeg.a  $(LIBOBJECTS)
+       $(AR2) libjpeg.a
+
+cjpeg.exe: $(COBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o cjpeg.exe $(COBJECTS) libjpeg.a $(LDLIBS)
+
+djpeg.exe: $(DOBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o djpeg.exe $(DOBJECTS) libjpeg.a $(LDLIBS)
+
+jpegtran.exe: $(TROBJECTS) libjpeg.a
+       $(LN) $(LDFLAGS) -o jpegtran.exe $(TROBJECTS) libjpeg.a $(LDLIBS)
+
+rdjpgcom.exe: rdjpgcom.o
+       $(LN) $(LDFLAGS) -o rdjpgcom.exe rdjpgcom.o $(LDLIBS)
+
+wrjpgcom.exe: wrjpgcom.o
+       $(LN) $(LDFLAGS) -o wrjpgcom.exe wrjpgcom.o $(LDLIBS)
+
+jconfig.h: jconfig.doc
+       echo You must prepare a system-dependent jconfig.h file.
+       echo Please read the installation directions in install.doc.
+       exit 1
+
+clean:
+       -$(RM) *.o
+       -$(RM) cjpeg.exe
+       -$(RM) djpeg.exe
+       -$(RM) jpegtran.exe
+       -$(RM) rdjpgcom.exe
+       -$(RM) wrjpgcom.exe
+       -$(RM) jsimdcfg.inc
+       -$(RM) libjpeg.a
+       -$(RM) testout*.*
+
+test: cjpeg.exe djpeg.exe jpegtran.exe
+       -$(RM) testout*.*
+       ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg
+       ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg
+       ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm
+       ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg
+       ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm
+       ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg
+       fc /b $(srcdir)\testimg.ppm testout.ppm
+       fc /b $(srcdir)\testimg.bmp testout.bmp
+       fc /b $(srcdir)\testimg.jpg testout.jpg
+       fc /b $(srcdir)\testimg.ppm testoutp.ppm
+       fc /b $(srcdir)\testimgp.jpg testoutp.jpg
+       fc /b $(srcdir)\testorig.jpg testoutt.jpg
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       $(RM) makecfg.exe
+
+%.o : %.asm
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
+jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
+# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
+rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
+wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
+cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
+rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.mgwdll b/makefile.mgwdll
new file mode 100644 (file)
index 0000000..08a3e69
--- /dev/null
@@ -0,0 +1,310 @@
+# Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
+
+# This makefile is for MinGW.
+# It builds the IJG library as a dynamically linkable library (.DLL),
+# and builds the sample applications which are linked against the DLL.
+
+# Read installation instructions before saying "make" !!
+
+srcdir = .
+VPATH  = $(srcdir)
+
+# The name of your C compiler:
+CC= gcc
+
+# You may need to adjust these cc options:
+# For gcc 3.4.x
+CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \
+        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
+# For gcc 3.3.x
+#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \
+#        -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir)
+# Generally, we recommend defining any configuration symbols in jconfig.h,
+# NOT via -D switches here.
+
+# The executable name of NASM and its options:
+NASM= nasmw
+NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fwin32 -DWIN32
+
+# Link-time cc options:
+LDFLAGS= -s
+LDFLAGS_DLL= $(LDFLAGS) -shared
+
+# To link any special libraries, add the necessary -l commands here.
+LDLIBS= 
+
+# DLL to build
+DLLNAME = jpeg62.dll
+# import library
+LIBNAME = libjpeg.dll.a
+
+# Put here the object file name for the correct system-dependent memory
+# manager file.
+SYSDEPMEM= jmemnobs.o
+
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimdw32.o
+
+# miscellaneous OS-dependent stuff
+# linker
+LN= $(CC)
+# file deletion command
+RM= del
+# library (.a) file creation command
+AR= ar rc
+# second step in .a creation (use "touch" if not needed)
+AR2= ranlib
+
+# End of configurable options.
+
+
+# source files: JPEG library proper
+LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+        jquant2.c jutils.c jmemmgr.c
+# memmgr back ends: compile only one of these into a working library
+SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
+# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
+APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
+        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
+        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
+SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
+# files included by source files
+INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
+# documentation, test, and support files
+DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
+        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
+        coderules.doc filelist.doc change.log
+MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
+        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
+        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
+        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
+        makvms.opt
+CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
+        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
+        jconfig.vms
+CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
+OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
+TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
+        testimgp.jpg
+DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
+        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
+# library object files common to compression and decompression
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
+# compression library object files
+CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
+        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
+# decompression library object files
+DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
+        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
+# These objectfiles are included in libjpeg.a
+LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
+# object files for sample applications (excluding library files)
+COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
+        cdjpeg.o
+DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
+        cdjpeg.o
+TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
+
+
+all: $(DLLNAME) cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
+
+$(LIBNAME): $(DLLNAME)
+$(DLLNAME): $(LIBOBJECTS) jpegdll.o jpegdll.def
+       $(LN) $(LDFLAGS_DLL) -o $(DLLNAME) -Wl,--out-implib,$(LIBNAME) \
+               $(LIBOBJECTS) jpegdll.o jpegdll.def
+
+jpegdll.o: jpegdll.rc
+       windres -O coff -o $@ $*.rc
+
+cjpeg.exe: $(COBJECTS) $(LIBNAME)
+       $(LN) $(LDFLAGS) -o cjpeg.exe $(COBJECTS) $(LIBNAME) $(LDLIBS)
+
+djpeg.exe: $(DOBJECTS) $(LIBNAME)
+       $(LN) $(LDFLAGS) -o djpeg.exe $(DOBJECTS) $(LIBNAME) $(LDLIBS)
+
+jpegtran.exe: $(TROBJECTS) $(LIBNAME)
+       $(LN) $(LDFLAGS) -o jpegtran.exe $(TROBJECTS) $(LIBNAME) $(LDLIBS)
+
+rdjpgcom.exe: rdjpgcom.o
+       $(LN) $(LDFLAGS) -o rdjpgcom.exe rdjpgcom.o $(LDLIBS)
+
+wrjpgcom.exe: wrjpgcom.o
+       $(LN) $(LDFLAGS) -o wrjpgcom.exe wrjpgcom.o $(LDLIBS)
+
+jconfig.h: jconfig.doc
+       echo You must prepare a system-dependent jconfig.h file.
+       echo Please read the installation directions in install.doc.
+       exit 1
+
+clean:
+       -$(RM) *.o
+       -$(RM) cjpeg.exe
+       -$(RM) djpeg.exe
+       -$(RM) jpegtran.exe
+       -$(RM) rdjpgcom.exe
+       -$(RM) wrjpgcom.exe
+       -$(RM) jsimdcfg.inc
+       -$(RM) $(DLLNAME)
+       -$(RM) $(LIBNAME)
+       -$(RM) testout*.*
+
+test: cjpeg.exe djpeg.exe jpegtran.exe
+       -$(RM) testout*.*
+       ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg
+       ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg
+       ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm
+       ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg
+       ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm
+       ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg
+       fc /b $(srcdir)\testimg.ppm testout.ppm
+       fc /b $(srcdir)\testimg.bmp testout.bmp
+       fc /b $(srcdir)\testimg.jpg testout.jpg
+       fc /b $(srcdir)\testimg.ppm testoutp.ppm
+       fc /b $(srcdir)\testimgp.jpg testoutp.jpg
+       fc /b $(srcdir)\testorig.jpg testoutt.jpg
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       $(RM) makecfg.exe
+
+%.o : %.asm
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
+jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
+# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
+rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
+wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
+cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
+rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
index 00455ab6a88496de9bb3d3c026de45cfa8d256bb..e05ecc00206312a08a7a19d52e1fd685d37b8fc4 100644 (file)
@@ -1,4 +1,5 @@
 # Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
 
 # This makefile is suitable for Unix-like systems with non-ANSI compilers.
 # If you have an ANSI compiler, makefile.ansi is a better starting point.
@@ -15,6 +16,13 @@ CFLAGS= -O
 # However, any special defines for ansi2knr.c may be included here:
 ANSI2KNRFLAGS= 
 
+# The executable name of NASM and its options:
+NASM= nasm
+NAFLAGS= $(NASM_OBJFMT) -I./
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -faout -DAOUT
+
 # Link-time cc options:
 LDFLAGS= 
 
@@ -26,6 +34,10 @@ LDLIBS=
 # to use jmemansi.o or jmemname.o if you have limited swap space.
 SYSDEPMEM= jmemnobs.o
 
+# OS-dependent SIMD instruction support checker
+# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc)
+SYSDEPSIMDCHK= jsimdgcc.o
+
 # miscellaneous OS-dependent stuff
 # linker
 LN= $(CC)
@@ -79,17 +91,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
 DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
         $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
 # library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
+COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \
+        $(SYSDEPSIMDCHK)
 # compression library object files
 CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
         jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
+        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \
+        jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \
+        jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \
+        jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o
 # decompression library object files
 DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
         jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
+        jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \
+        jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \
+        jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \
+        jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \
+        jdmermmx.o jdmerss2.o
 # These objectfiles are included in libjpeg.a
 LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
 # object files for sample applications (excluding library files)
@@ -139,7 +157,7 @@ jconfig.h: jconfig.doc
 
 clean:
        $(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom
-       $(RM) ansi2knr core testout*
+       $(RM) jsimdcfg.inc ansi2knr core testout*
 
 test: cjpeg djpeg jpegtran
        $(RM) testout*
@@ -157,10 +175,63 @@ test: cjpeg djpeg jpegtran
        cmp testorig.jpg testoutt.jpg
 
 
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg ./makecfg.c $(LDLIBS)
+       ./makecfg > jsimdcfg.inc
+       $(RM) ./makecfg
+
+.asm.o:
+       $(NASM) $(NAFLAGS) -o $@ $*.asm
+
+jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
 jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -171,33 +242,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.
 jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
index 2acf06940276dfbf3c7d75556081250adc2c116f..7cfeda3d7eb706fa3ea8bc9e11f51d5b3e6e86ff 100644 (file)
@@ -1,32 +1,50 @@
 # Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
 
 # This makefile is for Microsoft Visual C++ on Windows NT (and 95?).
 # It builds the IJG library as a statically linkable library (.LIB),
 # and builds the sample applications as console-mode apps.
-# Thanks to Xingong Chang, Raymond Everly and others.
 
 # Read installation instructions before saying "nmake" !!
-# To build an optimized library without debug info, say "nmake nodebug=1".
 
-# Pull in standard variable definitions
-!include <win32.mak>
+# The name of your C compiler:
+CC= cl
+LD= link
 
 # You may want to adjust these compiler options:
-CFLAGS= $(cflags) $(cdebug) $(cvars) -I.
+!ifdef crtdll
+# (DLL version of CRT)
+CFLAGS= -nologo -c -MD -W3 -O2 -GF -Gy -DNDEBUG -I.
+!else
+# (Single threaded static CRT)
+CFLAGS= -nologo -c -ML -W3 -O2 -GF -Gy -DNDEBUG -I.
+!endif
+
 # Generally, we recommend defining any configuration symbols in jconfig.h,
 # NOT via -D switches here.
 
+# The executable name of NASM and its options:
+NASM= nasmw
+NAFLAGS= $(NASM_OBJFMT) -I./
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fwin32 -DWIN32
+
 # Link-time options:
-LDFLAGS= $(ldebug) $(conlflags)
+LDFLAGS= -nologo -release -subsystem:console,4.0 -opt:nowin98
 
 # To link any special libraries, add the necessary commands here.
-LDLIBS= $(conlibs)
+LDLIBS= 
 
 # Put here the object file name for the correct system-dependent memory
 # manager file.  For NT we suggest jmemnobs.obj, which expects the OS to
 # provide adequate virtual memory.
 SYSDEPMEM= jmemnobs.obj
 
+# OS-dependent SIMD instruction support checker
+# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc)
+SYSDEPSIMDCHK= jsimdw32.obj
+
 # miscellaneous OS-dependent stuff
 # file deletion command
 RM= del
@@ -72,18 +90,26 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
 DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
         $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
 # library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
+COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \
+        jsimdcpu.obj $(SYSDEPSIMDCHK)
 # compression library object files
 CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
         jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
         jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
+        jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \
+        jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \
+        jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \
+        jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \
+        jfsseflt.obj
 # decompression library object files
 DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
         jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \
-        jquant1.obj jquant2.obj jdmerge.obj
+        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \
+        jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \
+        jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \
+        jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \
+        jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \
+        jdcolss2.obj jdmermmx.obj jdmerss2.obj
 # These objectfiles are included in libjpeg.lib
 LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
 # object files for sample applications (excluding library files)
@@ -94,38 +120,46 @@ DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
 TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
 
 # Template command for compiling .c to .obj
-.c.obj:
-       $(cc) $(CFLAGS) $*.c
+.c.obj::
+       $(CC) $(CFLAGS) $<
 
 
 all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
 
 libjpeg.lib: $(LIBOBJECTS)
-       $(RM) libjpeg.lib
+       -$(RM) libjpeg.lib
        lib -out:libjpeg.lib  $(LIBOBJECTS)
 
 cjpeg.exe: $(COBJECTS) libjpeg.lib
-       $(link) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS)
+       $(LD) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS)
 
 djpeg.exe: $(DOBJECTS) libjpeg.lib
-       $(link) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS)
+       $(LD) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS)
 
 jpegtran.exe: $(TROBJECTS) libjpeg.lib
-       $(link) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS)
+       $(LD) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS)
 
 rdjpgcom.exe: rdjpgcom.obj
-       $(link) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS)
+       $(LD) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS)
 
 wrjpgcom.exe: wrjpgcom.obj
-       $(link) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS)
+       $(LD) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS)
 
 
 clean:
-       $(RM) *.obj *.exe libjpeg.lib
-       $(RM) testout*
+       -$(RM) *.obj
+       -$(RM) cjpeg.exe
+       -$(RM) djpeg.exe
+       -$(RM) jpegtran.exe
+       -$(RM) rdjpgcom.exe
+       -$(RM) wrjpgcom.exe
+       -$(RM) jsimdcfg.inc
+       -$(RM) libjpeg.lib
+       -if exist *.manifest $(RM) *.manifest
+       -if exist testout*   $(RM) testout*
 
 test: cjpeg.exe djpeg.exe jpegtran.exe
-       $(RM) testout*
+       -if exist testout* $(RM) testout*
        .\djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
        .\djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
        .\cjpeg -dct int -outfile testout.jpg  testimg.ppm
@@ -140,10 +174,66 @@ test: cjpeg.exe djpeg.exe jpegtran.exe
        fc /b testorig.jpg testoutt.jpg
 
 
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) makecfg.c
+       $(LD) $(LDFLAGS) -out:makecfg.exe makecfg.obj $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       $(RM) makecfg.obj
+       $(RM) makecfg.exe
+       if exist makecfg.exe.manifest $(RM) makecfg.exe.manifest
+
+.asm.obj:
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
 jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
@@ -154,33 +244,33 @@ jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerro
 jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
 jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
 jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
 jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
 jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
 jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
 jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
diff --git a/makefile.vcdll b/makefile.vcdll
new file mode 100644 (file)
index 0000000..cd715eb
--- /dev/null
@@ -0,0 +1,311 @@
+# Makefile for Independent JPEG Group's software
+# Modified for x86 SIMD extension
+
+# This makefile is for Microsoft Visual C++ 6.0.
+# It builds the IJG library as a dynamically linkable library (.DLL),
+# and builds the sample applications which are linked against the DLL.
+
+# Read installation instructions before saying "nmake" !!
+
+# The name of your C compiler:
+CC= cl
+LD= link
+RC= rc
+
+# You may want to adjust these compiler options:
+#  You have to use a DLL version of C Run-Time library for both
+#  the JPEG DLL and any applications linked to the JPEG DLL.
+CFLAGS= -nologo -c -MD -W3 -O2 -GF -Gy -DNDEBUG -I.
+
+# Generally, we recommend defining any configuration symbols in jconfig.h,
+# NOT via -D switches here.
+
+# The executable name of NASM and its options:
+NASM= nasmw
+NAFLAGS= $(NASM_OBJFMT) -I./
+# object file format specifier for NASM
+# see jsimdext.inc for more details.
+NASM_OBJFMT= -fwin32 -DWIN32
+
+# Link-time options:
+LDFLAGS= -nologo -release -subsystem:console,4.0 -opt:nowin98
+LDFLAGS_DLL= -nologo -release -dll -opt:nowin98
+
+# To link any special libraries, add the necessary commands here.
+LDLIBS= 
+
+# DLL to build
+DLLNAME = jpeg62.dll
+# import library
+LIBNAME = jpeg62.lib
+
+# Put here the object file name for the correct system-dependent memory
+# manager file.  For NT we suggest jmemnobs.obj, which expects the OS to
+# provide adequate virtual memory.
+SYSDEPMEM= jmemnobs.obj
+
+# OS-dependent SIMD instruction support checker
+# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc)
+SYSDEPSIMDCHK= jsimdw32.obj
+
+# miscellaneous OS-dependent stuff
+# file deletion command
+RM= del
+
+# End of configurable options.
+
+
+# source files: JPEG library proper
+LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+        jquant2.c jutils.c jmemmgr.c
+# memmgr back ends: compile only one of these into a working library
+SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
+# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
+APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
+        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
+        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
+SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
+# files included by source files
+INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
+# documentation, test, and support files
+DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
+        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
+        coderules.doc filelist.doc change.log
+MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
+        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
+        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
+        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
+        makvms.opt
+CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
+        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
+        jconfig.vms
+CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
+OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
+TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
+        testimgp.jpg
+DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
+        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
+# library object files common to compression and decompression
+COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \
+        jsimdcpu.obj $(SYSDEPSIMDCHK)
+# compression library object files
+CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
+        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
+        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
+        jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \
+        jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \
+        jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \
+        jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \
+        jfsseflt.obj
+# decompression library object files
+DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
+        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
+        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \
+        jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \
+        jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \
+        jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \
+        jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \
+        jdcolss2.obj jdmermmx.obj jdmerss2.obj
+# These objectfiles are included in libjpeg.lib
+LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
+# object files for sample applications (excluding library files)
+COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
+        rdswitch.obj cdjpeg.obj
+DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
+        rdcolmap.obj cdjpeg.obj
+TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
+
+# Template command for compiling .c to .obj
+.c.obj::
+       $(CC) $(CFLAGS) $<
+
+
+all: $(DLLNAME) cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
+
+$(LIBNAME): $(DLLNAME)
+$(DLLNAME): $(LIBOBJECTS) jpegdll.res jpegdll.def
+       $(LD) $(LDFLAGS_DLL) -out:$(DLLNAME) -implib:$(LIBNAME) \
+               $(LIBOBJECTS) jpegdll.res -def:jpegdll.def
+
+jpegdll.res: jpegdll.rc
+       $(RC) -fo $@ $*.rc
+
+cjpeg.exe: $(COBJECTS) $(LIBNAME)
+       $(LD) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) $(LIBNAME) $(LDLIBS)
+
+djpeg.exe: $(DOBJECTS) $(LIBNAME)
+       $(LD) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) $(LIBNAME) $(LDLIBS)
+
+jpegtran.exe: $(TROBJECTS) $(LIBNAME)
+       $(LD) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) $(LIBNAME) $(LDLIBS)
+
+rdjpgcom.exe: rdjpgcom.obj
+       $(LD) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS)
+
+wrjpgcom.exe: wrjpgcom.obj
+       $(LD) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS)
+
+
+clean:
+       -$(RM) *.obj
+       -$(RM) cjpeg.exe
+       -$(RM) djpeg.exe
+       -$(RM) jpegtran.exe
+       -$(RM) rdjpgcom.exe
+       -$(RM) wrjpgcom.exe
+       -$(RM) jsimdcfg.inc
+       -$(RM) jpegdll.res
+       -$(RM) $(DLLNAME)
+       -$(RM) $(DLLNAME:.dll=.exp)
+       -$(RM) $(LIBNAME)
+       -if exist *.manifest $(RM) *.manifest
+       -if exist testout*   $(RM) testout*
+
+test: cjpeg.exe djpeg.exe jpegtran.exe
+       -if exist testout* $(RM) testout*
+       .\djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
+       .\djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
+       .\cjpeg -dct int -outfile testout.jpg  testimg.ppm
+       .\djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
+       .\cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
+       .\jpegtran -outfile testoutt.jpg testprog.jpg
+       fc /b testimg.ppm testout.ppm
+       fc /b testimg.bmp testout.bmp
+       fc /b testimg.jpg testout.jpg
+       fc /b testimg.ppm testoutp.ppm
+       fc /b testimgp.jpg testoutp.jpg
+       fc /b testorig.jpg testoutt.jpg
+
+
+jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+       $(CC) $(CFLAGS) makecfg.c
+       $(LD) $(LDFLAGS) -out:makecfg.exe makecfg.obj $(LDLIBS)
+       .\makecfg.exe > jsimdcfg.inc
+       $(RM) makecfg.obj
+       $(RM) makecfg.exe
+       if exist makecfg.exe.manifest $(RM) makecfg.exe.manifest
+
+.asm.obj:
+       $(NASM) $(NAFLAGS) -o $@ $<
+
+jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc
+jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc
+jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc
+jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc
+jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc
+ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc
+
+jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+
+jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
+jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
+jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
+jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h
+jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
+# jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+# jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
+jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
+jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
+cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
+jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
+rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
+wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
+cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
+rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
+wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/nasm_lt.sh b/nasm_lt.sh
new file mode 100644 (file)
index 0000000..ef5a591
--- /dev/null
@@ -0,0 +1,57 @@
+#! /bin/sh
+command=""
+infile=""
+o_opt=no
+pic=no
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -DPIC|-fPIC|-fpic)
+            if [ "$pic" != "yes" ] ; then
+                command="$command -DPIC"
+                pic=yes
+            fi
+            ;;
+        -f|-fbin|-faout|-faoutb|-fcoff|-felf|-fas86| \
+        -fobj|-fwin32|-frdf|-fieee|-fmacho)
+            # it's a file format specifier for nasm.
+            command="$command $1"
+            ;;
+        -f*)
+            # maybe a code-generation flag for gcc.
+            ;;
+        -[Ii]*)
+            incdir=`echo "$1" | sed 's/^-[Ii]//'`
+            if [ "x$incdir" = x -a "x$2" != x ] ; then
+                case "$2" in
+                    -*) ;;
+                    *) incdir="$2"; shift;;
+                esac
+            fi
+            if [ "x$incdir" != x ] ; then
+                # In the case of NASM, the trailing slash is necessary.
+                incdir=`echo "$incdir" | sed 's%/*$%/%'`
+                command="$command -I$incdir"
+            fi
+            ;;
+        -o*)
+            o_opt=yes
+            command="$command $1"
+            ;;
+        *.asm)
+            infile=$1
+            command="$command $1"
+            ;;
+        *)
+            command="$command $1"
+            ;;
+    esac
+    shift
+done
+if [ "$o_opt" != yes ] ; then
+    # By default, NASM creates an output file
+    # in the same directory as the input file.
+    outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o"
+    command="$command $outfile"
+fi
+echo $command
+exec $command
diff --git a/rdbmp.c b/rdbmp.c
index b05fe2ac47cf2753dda7cb89a756984498d118bd..2245847ff72807bb7f64f4d4a91c143dc51d89b5 100644 (file)
--- a/rdbmp.c
+++ b/rdbmp.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : October 19, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains routines to read input images in Microsoft "BMP"
  * format (MS Windows 3.x, OS/2 1.x, and OS/2 2.x flavors).
  * Currently, only 8-bit and 24-bit images are supported, not 1-bit or
@@ -187,11 +194,14 @@ METHODDEF(JDIMENSION)
 preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
 {
   bmp_source_ptr source = (bmp_source_ptr) sinfo;
+#if (BITS_IN_JSAMPLE != 8) || defined(NEED_FAR_POINTERS)
   register FILE *infile = source->pub.input_file;
   register int c;
   register JSAMPROW out_ptr;
+  JDIMENSION col;
+#endif
+  JDIMENSION row;
   JSAMPARRAY image_ptr;
-  JDIMENSION row, col;
   cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
 
   /* Read the data into a virtual array in input-file row order. */
@@ -204,6 +214,10 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
     image_ptr = (*cinfo->mem->access_virt_sarray)
       ((j_common_ptr) cinfo, source->whole_image,
        row, (JDIMENSION) 1, TRUE);
+#if (BITS_IN_JSAMPLE == 8) && !defined(NEED_FAR_POINTERS)
+    if (! ReadOK(source->pub.input_file, image_ptr[0], source->row_width))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+#else
     out_ptr = image_ptr[0];
     for (col = source->row_width; col > 0; col--) {
       /* inline copy of read_byte() for speed */
@@ -211,6 +225,7 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
        ERREXIT(cinfo, JERR_INPUT_EOF);
       *out_ptr++ = (JSAMPLE) c;
     }
+#endif
   }
   if (progress != NULL)
     progress->completed_extra_passes++;
diff --git a/rdgif.c b/rdgif.c
index b27c1675d763fde7d81591c3e521c02027690fe0..0da2515cb7ca499c32dc56f3bed4c0b349c9a653 100644 (file)
--- a/rdgif.c
+++ b/rdgif.c
@@ -1,19 +1,39 @@
 /*
  * rdgif.c
  *
- * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 1991-1996, Thomas G. Lane.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ **************************************************************************
+ * WARNING: You will need an LZW patent license from Unisys in order to   *
+ * use this file legally in any commercial or shareware application.      *
+ **************************************************************************
+ *
  * This file contains routines to read input images in GIF format.
  *
- *****************************************************************************
- * NOTE: to avoid entanglements with Unisys' patent on LZW compression,      *
- * the ability to read GIF files has been removed from the IJG distribution. *
- * Sorry about that.                                                         *
- *****************************************************************************
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume input from
+ * an ordinary stdio stream.  They further assume that reading begins
+ * at the start of the file; input_init may need work if the
+ * user interface has already read some data (e.g., to determine that
+ * the file is indeed GIF format).
+ */
+
+/*
+ * This code is loosely based on giftoppm from the PBMPLUS distribution
+ * of Feb. 1991.  That file contains the following copyright notice:
+ * +-------------------------------------------------------------------+
+ * | Copyright 1990, David Koblas.                                     |
+ * |   Permission to use, copy, modify, and distribute this software   |
+ * |   and its documentation for any purpose and without fee is hereby |
+ * |   granted, provided that the above copyright notice appear in all |
+ * |   copies and that both that copyright notice and this permission  |
+ * |   notice appear in supporting documentation.  This software is    |
+ * |   provided "as is" without express or implied warranty.           |
+ * +-------------------------------------------------------------------+
  *
- * We are required to state that
+ * We are also required to state that
  *    "The Graphics Interchange Format(c) is the Copyright property of
  *    CompuServe Incorporated. GIF(sm) is a Service Mark property of
  *    CompuServe Incorporated."
 
 #ifdef GIF_SUPPORTED
 
+
+#define        MAXCOLORMAPSIZE 256     /* max # of colors in a GIF colormap */
+#define NUMCOLORS      3       /* # of colors */
+#define CM_RED         0       /* color component numbers */
+#define CM_GREEN       1
+#define CM_BLUE                2
+
+#define        MAX_LZW_BITS    12      /* maximum LZW code size */
+#define LZW_TABLE_SIZE (1<<MAX_LZW_BITS) /* # of possible LZW symbols */
+
+/* Macros for extracting header data --- note we assume chars may be signed */
+
+#define LM_to_uint(a,b)                ((((b)&0xFF) << 8) | ((a)&0xFF))
+
+#define BitSet(byte, bit)      ((byte) & (bit))
+#define INTERLACE      0x40    /* mask for bit signifying interlaced image */
+#define COLORMAPFLAG   0x80    /* mask for bit signifying colormap presence */
+
+#define        ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
+
+/* LZW decompression tables look like this:
+ *   symbol_head[K] = prefix symbol of any LZW symbol K (0..LZW_TABLE_SIZE-1)
+ *   symbol_tail[K] = suffix byte   of any LZW symbol K (0..LZW_TABLE_SIZE-1)
+ * Note that entries 0..end_code of the above tables are not used,
+ * since those symbols represent raw bytes or special codes.
+ *
+ * The stack represents the not-yet-used expansion of the last LZW symbol.
+ * In the worst case, a symbol could expand to as many bytes as there are
+ * LZW symbols, so we allocate LZW_TABLE_SIZE bytes for the stack.
+ * (This is conservative since that number includes the raw-byte symbols.)
+ *
+ * The tables are allocated from FAR heap space since they would use up
+ * rather a lot of the near data space in a PC.
+ */
+
+
+/* Private version of data source object */
+
+typedef struct {
+  struct cjpeg_source_struct pub; /* public fields */
+
+  j_compress_ptr cinfo;                /* back link saves passing separate parm */
+
+  JSAMPARRAY colormap;         /* GIF colormap (converted to my format) */
+
+  /* State for GetCode and LZWReadByte */
+  char code_buf[256+4];                /* current input data block */
+  int last_byte;               /* # of bytes in code_buf */
+  int last_bit;                        /* # of bits in code_buf */
+  int cur_bit;                 /* next bit index to read */
+  boolean out_of_blocks;       /* TRUE if hit terminator data block */
+
+  int input_code_size;         /* codesize given in GIF file */
+  int clear_code,end_code;     /* values for Clear and End codes */
+
+  int code_size;               /* current actual code size */
+  int limit_code;              /* 2^code_size */
+  int max_code;                        /* first unused code value */
+  boolean first_time;          /* flags first call to LZWReadByte */
+
+  /* Private state for LZWReadByte */
+  int oldcode;                 /* previous LZW symbol */
+  int firstcode;               /* first byte of oldcode's expansion */
+
+  /* LZW symbol table and expansion stack */
+  UINT16 FAR *symbol_head;     /* => table of prefix symbols */
+  UINT8  FAR *symbol_tail;     /* => table of suffix bytes */
+  UINT8  FAR *symbol_stack;    /* => stack for symbol expansions */
+  UINT8  FAR *sp;              /* stack pointer */
+
+  /* State for interlaced image processing */
+  boolean is_interlaced;       /* TRUE if have interlaced image */
+  jvirt_sarray_ptr interlaced_image; /* full image in interlaced order */
+  JDIMENSION cur_row_number;   /* need to know actual row number */
+  JDIMENSION pass2_offset;     /* # of pixel rows in pass 1 */
+  JDIMENSION pass3_offset;     /* # of pixel rows in passes 1&2 */
+  JDIMENSION pass4_offset;     /* # of pixel rows in passes 1,2,3 */
+} gif_source_struct;
+
+typedef gif_source_struct * gif_source_ptr;
+
+
+/* Forward declarations */
+METHODDEF(JDIMENSION) get_pixel_rows
+       JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo));
+METHODDEF(JDIMENSION) load_interlaced_image
+       JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo));
+METHODDEF(JDIMENSION) get_interlaced_row
+       JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo));
+
+
+LOCAL(int)
+ReadByte (gif_source_ptr sinfo)
+/* Read next byte from GIF file */
+{
+  register FILE * infile = sinfo->pub.input_file;
+  int c;
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  return c;
+}
+
+
+LOCAL(int)
+GetDataBlock (gif_source_ptr sinfo, char *buf)
+/* Read a GIF data block, which has a leading count byte */
+/* A zero-length block marks the end of a data block sequence */
+{
+  int count;
+
+  count = ReadByte(sinfo);
+  if (count > 0) {
+    if (! ReadOK(sinfo->pub.input_file, buf, count))
+      ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  }
+  return count;
+}
+
+
+LOCAL(void)
+SkipDataBlocks (gif_source_ptr sinfo)
+/* Skip a series of data blocks, until a block terminator is found */
+{
+  char buf[256];
+
+  while (GetDataBlock(sinfo, buf) > 0)
+    /* skip */;
+}
+
+
+LOCAL(void)
+ReInitLZW (gif_source_ptr sinfo)
+/* (Re)initialize LZW state; shared code for startup and Clear processing */
+{
+  sinfo->code_size = sinfo->input_code_size + 1;
+  sinfo->limit_code = sinfo->clear_code << 1;  /* 2^code_size */
+  sinfo->max_code = sinfo->clear_code + 2;     /* first unused code value */
+  sinfo->sp = sinfo->symbol_stack;             /* init stack to empty */
+}
+
+
+LOCAL(void)
+InitLZWCode (gif_source_ptr sinfo)
+/* Initialize for a series of LZWReadByte (and hence GetCode) calls */
+{
+  /* GetCode initialization */
+  sinfo->last_byte = 2;                /* make safe to "recopy last two bytes" */
+  sinfo->last_bit = 0;         /* nothing in the buffer */
+  sinfo->cur_bit = 0;          /* force buffer load on first call */
+  sinfo->out_of_blocks = FALSE;
+
+  /* LZWReadByte initialization: */
+  /* compute special code values (note that these do not change later) */
+  sinfo->clear_code = 1 << sinfo->input_code_size;
+  sinfo->end_code = sinfo->clear_code + 1;
+  sinfo->first_time = TRUE;
+  ReInitLZW(sinfo);
+}
+
+
+LOCAL(int)
+GetCode (gif_source_ptr sinfo)
+/* Fetch the next code_size bits from the GIF data */
+/* We assume code_size is less than 16 */
+{
+  register INT32 accum;
+  int offs, ret, count;
+
+  while ( (sinfo->cur_bit + sinfo->code_size) > sinfo->last_bit) {
+    /* Time to reload the buffer */
+    if (sinfo->out_of_blocks) {
+      WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA);
+      return sinfo->end_code;  /* fake something useful */
+    }
+    /* preserve last two bytes of what we have -- assume code_size <= 16 */
+    sinfo->code_buf[0] = sinfo->code_buf[sinfo->last_byte-2];
+    sinfo->code_buf[1] = sinfo->code_buf[sinfo->last_byte-1];
+    /* Load more bytes; set flag if we reach the terminator block */
+    if ((count = GetDataBlock(sinfo, &sinfo->code_buf[2])) == 0) {
+      sinfo->out_of_blocks = TRUE;
+      WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA);
+      return sinfo->end_code;  /* fake something useful */
+    }
+    /* Reset counters */
+    sinfo->cur_bit = (sinfo->cur_bit - sinfo->last_bit) + 16;
+    sinfo->last_byte = 2 + count;
+    sinfo->last_bit = sinfo->last_byte * 8;
+  }
+
+  /* Form up next 24 bits in accum */
+  offs = sinfo->cur_bit >> 3;  /* byte containing cur_bit */
+#ifdef CHAR_IS_UNSIGNED
+  accum = sinfo->code_buf[offs+2];
+  accum <<= 8;
+  accum |= sinfo->code_buf[offs+1];
+  accum <<= 8;
+  accum |= sinfo->code_buf[offs];
+#else
+  accum = sinfo->code_buf[offs+2] & 0xFF;
+  accum <<= 8;
+  accum |= sinfo->code_buf[offs+1] & 0xFF;
+  accum <<= 8;
+  accum |= sinfo->code_buf[offs] & 0xFF;
+#endif
+
+  /* Right-align cur_bit in accum, then mask off desired number of bits */
+  accum >>= (sinfo->cur_bit & 7);
+  ret = ((int) accum) & ((1 << sinfo->code_size) - 1);
+  
+  sinfo->cur_bit += sinfo->code_size;
+  return ret;
+}
+
+
+LOCAL(int)
+LZWReadByte (gif_source_ptr sinfo)
+/* Read an LZW-compressed byte */
+{
+  register int code;           /* current working code */
+  int incode;                  /* saves actual input code */
+
+  /* First time, just eat the expected Clear code(s) and return next code, */
+  /* which is expected to be a raw byte. */
+  if (sinfo->first_time) {
+    sinfo->first_time = FALSE;
+    code = sinfo->clear_code;  /* enables sharing code with Clear case */
+  } else {
+
+    /* If any codes are stacked from a previously read symbol, return them */
+    if (sinfo->sp > sinfo->symbol_stack)
+      return (int) *(-- sinfo->sp);
+
+    /* Time to read a new symbol */
+    code = GetCode(sinfo);
+
+  }
+
+  if (code == sinfo->clear_code) {
+    /* Reinit state, swallow any extra Clear codes, and */
+    /* return next code, which is expected to be a raw byte. */
+    ReInitLZW(sinfo);
+    do {
+      code = GetCode(sinfo);
+    } while (code == sinfo->clear_code);
+    if (code > sinfo->clear_code) { /* make sure it is a raw byte */
+      WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA);
+      code = 0;                        /* use something valid */
+    }
+    /* make firstcode, oldcode valid! */
+    sinfo->firstcode = sinfo->oldcode = code;
+    return code;
+  }
+
+  if (code == sinfo->end_code) {
+    /* Skip the rest of the image, unless GetCode already read terminator */
+    if (! sinfo->out_of_blocks) {
+      SkipDataBlocks(sinfo);
+      sinfo->out_of_blocks = TRUE;
+    }
+    /* Complain that there's not enough data */
+    WARNMS(sinfo->cinfo, JWRN_GIF_ENDCODE);
+    /* Pad data with 0's */
+    return 0;                  /* fake something usable */
+  }
+
+  /* Got normal raw byte or LZW symbol */
+  incode = code;               /* save for a moment */
+  
+  if (code >= sinfo->max_code) { /* special case for not-yet-defined symbol */
+    /* code == max_code is OK; anything bigger is bad data */
+    if (code > sinfo->max_code) {
+      WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA);
+      incode = 0;              /* prevent creation of loops in symbol table */
+    }
+    /* this symbol will be defined as oldcode/firstcode */
+    *(sinfo->sp++) = (UINT8) sinfo->firstcode;
+    code = sinfo->oldcode;
+  }
+
+  /* If it's a symbol, expand it into the stack */
+  while (code >= sinfo->clear_code) {
+    *(sinfo->sp++) = sinfo->symbol_tail[code]; /* tail is a byte value */
+    code = sinfo->symbol_head[code]; /* head is another LZW symbol */
+  }
+  /* At this point code just represents a raw byte */
+  sinfo->firstcode = code;     /* save for possible future use */
+
+  /* If there's room in table, */
+  if ((code = sinfo->max_code) < LZW_TABLE_SIZE) {
+    /* Define a new symbol = prev sym + head of this sym's expansion */
+    sinfo->symbol_head[code] = sinfo->oldcode;
+    sinfo->symbol_tail[code] = (UINT8) sinfo->firstcode;
+    sinfo->max_code++;
+    /* Is it time to increase code_size? */
+    if ((sinfo->max_code >= sinfo->limit_code) &&
+       (sinfo->code_size < MAX_LZW_BITS)) {
+      sinfo->code_size++;
+      sinfo->limit_code <<= 1; /* keep equal to 2^code_size */
+    }
+  }
+  
+  sinfo->oldcode = incode;     /* save last input symbol for future use */
+  return sinfo->firstcode;     /* return first byte of symbol's expansion */
+}
+
+
+LOCAL(void)
+ReadColorMap (gif_source_ptr sinfo, int cmaplen, JSAMPARRAY cmap)
+/* Read a GIF colormap */
+{
+  int i;
+
+  for (i = 0; i < cmaplen; i++) {
+#if BITS_IN_JSAMPLE == 8
+#define UPSCALE(x)  (x)
+#else
+#define UPSCALE(x)  ((x) << (BITS_IN_JSAMPLE-8))
+#endif
+    cmap[CM_RED][i]   = (JSAMPLE) UPSCALE(ReadByte(sinfo));
+    cmap[CM_GREEN][i] = (JSAMPLE) UPSCALE(ReadByte(sinfo));
+    cmap[CM_BLUE][i]  = (JSAMPLE) UPSCALE(ReadByte(sinfo));
+  }
+}
+
+
+LOCAL(void)
+DoExtension (gif_source_ptr sinfo)
+/* Process an extension block */
+/* Currently we ignore 'em all */
+{
+  int extlabel;
+
+  /* Read extension label byte */
+  extlabel = ReadByte(sinfo);
+  TRACEMS1(sinfo->cinfo, 1, JTRC_GIF_EXTENSION, extlabel);
+  /* Skip the data block(s) associated with the extension */
+  SkipDataBlocks(sinfo);
+}
+
+
+/*
+ * Read the file header; return image size and component count.
+ */
+
+METHODDEF(void)
+start_input_gif (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr) sinfo;
+  char hdrbuf[10];             /* workspace for reading control blocks */
+  unsigned int width, height;  /* image dimensions */
+  int colormaplen, aspectRatio;
+  int c;
+
+  /* Allocate space to store the colormap */
+  source->colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) MAXCOLORMAPSIZE, (JDIMENSION) NUMCOLORS);
+
+  /* Read and verify GIF Header */
+  if (! ReadOK(source->pub.input_file, hdrbuf, 6))
+    ERREXIT(cinfo, JERR_GIF_NOT);
+  if (hdrbuf[0] != 'G' || hdrbuf[1] != 'I' || hdrbuf[2] != 'F')
+    ERREXIT(cinfo, JERR_GIF_NOT);
+  /* Check for expected version numbers.
+   * If unknown version, give warning and try to process anyway;
+   * this is per recommendation in GIF89a standard.
+   */
+  if ((hdrbuf[3] != '8' || hdrbuf[4] != '7' || hdrbuf[5] != 'a') &&
+      (hdrbuf[3] != '8' || hdrbuf[4] != '9' || hdrbuf[5] != 'a'))
+    TRACEMS3(cinfo, 1, JTRC_GIF_BADVERSION, hdrbuf[3], hdrbuf[4], hdrbuf[5]);
+
+  /* Read and decipher Logical Screen Descriptor */
+  if (! ReadOK(source->pub.input_file, hdrbuf, 7))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  width = LM_to_uint(hdrbuf[0],hdrbuf[1]);
+  height = LM_to_uint(hdrbuf[2],hdrbuf[3]);
+  colormaplen = 2 << (hdrbuf[4] & 0x07);
+  /* we ignore the color resolution, sort flag, and background color index */
+  aspectRatio = hdrbuf[6] & 0xFF;
+  if (aspectRatio != 0 && aspectRatio != 49)
+    TRACEMS(cinfo, 1, JTRC_GIF_NONSQUARE);
+
+  /* Read global colormap if header indicates it is present */
+  if (BitSet(hdrbuf[4], COLORMAPFLAG))
+    ReadColorMap(source, colormaplen, source->colormap);
+
+  /* Scan until we reach start of desired image.
+   * We don't currently support skipping images, but could add it easily.
+   */
+  for (;;) {
+    c = ReadByte(source);
+
+    if (c == ';')              /* GIF terminator?? */
+      ERREXIT(cinfo, JERR_GIF_IMAGENOTFOUND);
+
+    if (c == '!') {            /* Extension */
+      DoExtension(source);
+      continue;
+    }
+    
+    if (c != ',') {            /* Not an image separator? */
+      WARNMS1(cinfo, JWRN_GIF_CHAR, c);
+      continue;
+    }
+
+    /* Read and decipher Local Image Descriptor */
+    if (! ReadOK(source->pub.input_file, hdrbuf, 9))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+    /* we ignore top/left position info, also sort flag */
+    width = LM_to_uint(hdrbuf[4],hdrbuf[5]);
+    height = LM_to_uint(hdrbuf[6],hdrbuf[7]);
+    source->is_interlaced = BitSet(hdrbuf[8], INTERLACE);
+
+    /* Read local colormap if header indicates it is present */
+    /* Note: if we wanted to support skipping images, */
+    /* we'd need to skip rather than read colormap for ignored images */
+    if (BitSet(hdrbuf[8], COLORMAPFLAG)) {
+      colormaplen = 2 << (hdrbuf[8] & 0x07);
+      ReadColorMap(source, colormaplen, source->colormap);
+    }
+
+    source->input_code_size = ReadByte(source); /* get min-code-size byte */
+    if (source->input_code_size < 2 || source->input_code_size >= MAX_LZW_BITS)
+      ERREXIT1(cinfo, JERR_GIF_CODESIZE, source->input_code_size);
+
+    /* Reached desired image, so break out of loop */
+    /* If we wanted to skip this image, */
+    /* we'd call SkipDataBlocks and then continue the loop */
+    break;
+  }
+
+  /* Prepare to read selected image: first initialize LZW decompressor */
+  source->symbol_head = (UINT16 FAR *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                               LZW_TABLE_SIZE * SIZEOF(UINT16));
+  source->symbol_tail = (UINT8 FAR *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                               LZW_TABLE_SIZE * SIZEOF(UINT8));
+  source->symbol_stack = (UINT8 FAR *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                               LZW_TABLE_SIZE * SIZEOF(UINT8));
+  InitLZWCode(source);
+
+  /*
+   * If image is interlaced, we read it into a full-size sample array,
+   * decompressing as we go; then get_interlaced_row selects rows from the
+   * sample array in the proper order.
+   */
+  if (source->is_interlaced) {
+    /* We request the virtual array now, but can't access it until virtual
+     * arrays have been allocated.  Hence, the actual work of reading the
+     * image is postponed until the first call to get_pixel_rows.
+     */
+    source->interlaced_image = (*cinfo->mem->request_virt_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+       (JDIMENSION) width, (JDIMENSION) height, (JDIMENSION) 1);
+    if (cinfo->progress != NULL) {
+      cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
+      progress->total_extra_passes++; /* count file input as separate pass */
+    }
+    source->pub.get_pixel_rows = load_interlaced_image;
+  } else {
+    source->pub.get_pixel_rows = get_pixel_rows;
+  }
+
+  /* Create compressor input buffer. */
+  source->pub.buffer = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) width * NUMCOLORS, (JDIMENSION) 1);
+  source->pub.buffer_height = 1;
+
+  /* Return info about the image. */
+  cinfo->in_color_space = JCS_RGB;
+  cinfo->input_components = NUMCOLORS;
+  cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+
+  TRACEMS3(cinfo, 1, JTRC_GIF, width, height, colormaplen);
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for noninterlaced GIF images:
+ * we read directly from the GIF file.
+ */
+
+METHODDEF(JDIMENSION)
+get_pixel_rows (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr) sinfo;
+  register int c;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+  register JSAMPARRAY colormap = source->colormap;
+  
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    c = LZWReadByte(source);
+    *ptr++ = colormap[CM_RED][c];
+    *ptr++ = colormap[CM_GREEN][c];
+    *ptr++ = colormap[CM_BLUE][c];
+  }
+  return 1;
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for the first call on get_pixel_rows when
+ * reading an interlaced GIF file: we read the whole image into memory.
+ */
+
+METHODDEF(JDIMENSION)
+load_interlaced_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr) sinfo;
+  JSAMPARRAY image_ptr;
+  register JSAMPROW sptr;
+  register JDIMENSION col;
+  JDIMENSION row;
+  cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
+
+  /* Read the interlaced image into the virtual array we've created. */
+  for (row = 0; row < cinfo->image_height; row++) {
+    if (progress != NULL) {
+      progress->pub.pass_counter = (long) row;
+      progress->pub.pass_limit = (long) cinfo->image_height;
+      (*progress->pub.progress_monitor) ((j_common_ptr) cinfo);
+    }
+    image_ptr = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr) cinfo, source->interlaced_image,
+       row, (JDIMENSION) 1, TRUE);
+    sptr = image_ptr[0];
+    for (col = cinfo->image_width; col > 0; col--) {
+      *sptr++ = (JSAMPLE) LZWReadByte(source);
+    }
+  }
+  if (progress != NULL)
+    progress->completed_extra_passes++;
+
+  /* Replace method pointer so subsequent calls don't come here. */
+  source->pub.get_pixel_rows = get_interlaced_row;
+  /* Initialize for get_interlaced_row, and perform first call on it. */
+  source->cur_row_number = 0;
+  source->pass2_offset = (cinfo->image_height + 7) / 8;
+  source->pass3_offset = source->pass2_offset + (cinfo->image_height + 3) / 8;
+  source->pass4_offset = source->pass3_offset + (cinfo->image_height + 1) / 4;
+
+  return get_interlaced_row(cinfo, sinfo);
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for interlaced GIF images:
+ * we read from the virtual array.
+ */
+
+METHODDEF(JDIMENSION)
+get_interlaced_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr) sinfo;
+  JSAMPARRAY image_ptr;
+  register int c;
+  register JSAMPROW sptr, ptr;
+  register JDIMENSION col;
+  register JSAMPARRAY colormap = source->colormap;
+  JDIMENSION irow;
+
+  /* Figure out which row of interlaced image is needed, and access it. */
+  switch ((int) (source->cur_row_number & 7)) {
+  case 0:                      /* first-pass row */
+    irow = source->cur_row_number >> 3;
+    break;
+  case 4:                      /* second-pass row */
+    irow = (source->cur_row_number >> 3) + source->pass2_offset;
+    break;
+  case 2:                      /* third-pass row */
+  case 6:
+    irow = (source->cur_row_number >> 2) + source->pass3_offset;
+    break;
+  default:                     /* fourth-pass row */
+    irow = (source->cur_row_number >> 1) + source->pass4_offset;
+    break;
+  }
+  image_ptr = (*cinfo->mem->access_virt_sarray)
+    ((j_common_ptr) cinfo, source->interlaced_image,
+     irow, (JDIMENSION) 1, FALSE);
+  /* Scan the row, expand colormap, and output */
+  sptr = image_ptr[0];
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    c = GETJSAMPLE(*sptr++);
+    *ptr++ = colormap[CM_RED][c];
+    *ptr++ = colormap[CM_GREEN][c];
+    *ptr++ = colormap[CM_BLUE][c];
+  }
+  source->cur_row_number++;    /* for next time */
+  return 1;
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_input_gif (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  /* no work */
+}
+
+
 /*
  * The module selection routine for GIF format input.
  */
 GLOBAL(cjpeg_source_ptr)
 jinit_read_gif (j_compress_ptr cinfo)
 {
-  fprintf(stderr, "GIF input is unsupported for legal reasons.  Sorry.\n");
-  exit(EXIT_FAILURE);
-  return NULL;                 /* keep compiler happy */
+  gif_source_ptr source;
+
+  /* Create module interface object */
+  source = (gif_source_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                 SIZEOF(gif_source_struct));
+  source->cinfo = cinfo;       /* make back link for subroutines */
+  /* Fill in method ptrs, except get_pixel_rows which start_input sets */
+  source->pub.start_input = start_input_gif;
+  source->pub.finish_input = finish_input_gif;
+
+  return (cjpeg_source_ptr) source;
 }
 
 #endif /* GIF_SUPPORTED */
diff --git a/simd_README.ja.txt b/simd_README.ja.txt
new file mode 100644 (file)
index 0000000..bc10b63
--- /dev/null
@@ -0,0 +1,145 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == README ==
+-----------------------------------------------------------
+
+    ** Note **
+The accompanying documents related to x86 SIMD extension are written in
+Japanese. The English version of these documents is currently unavailable.
+I apologize for this inconvenience to international programmers.
+
+Most of the source code of the extension part is written in assembly
+language. To compile the source, you need NASM (netwide assembler).
+NASM is available from http://nasm.sourceforge.net/ or
+http://sourceforge.net/project/showfiles.php?group_id=6208 .
+
+At present, the x86 SIMD extension doesn't support 64-bit mode of
+AMD64 (x86_64).
+
+The x86 SIMD extension is an unofficial extension to the IJG JPEG
+software. Please do not send any questions about this distribution
+to the Independent JPEG Group.
+
+For conditions of distribution and use, see the IJG's README file.
+The same conditions apply to this SIMD-extended JPEG software.
+
+
+
+¢£¤³¤Î¥½¥Õ¥È¤Ï
+
+  JPEG ¤Î¥µ¥Ý¡¼¥È¥é¥¤¥Ö¥é¥ê¤È¤·¤Æ¹­¤¯»È¤ï¤ì¤Æ¤¤¤ë Independent JPEG Group's
+  JPEG library (libjpeg ¥é¥¤¥Ö¥é¥ê) ¤Ë¡¢Intel x86 ·Ï CPU ¤Î»ý¤Ä SIMD Ì¿Îá¤ò
+  ÍøÍѤ·¤¿¥³¡¼¥É(¥ë¡¼¥Á¥ó)¤ò¿·¤¿¤ËÄɲä·¡¢¹â®²½²þ¤¤·¤¿¤â¤Î¤Ç¤¹¡£
+  MMX ¤ä SSE ¤Ê¤É¤Î SIMD ±é»»µ¡Ç½¤òÁõÈ÷¤·¤Æ¤¤¤ë¥×¥í¥»¥Ã¥µ¾å¤Çưºî¤µ¤»¤ë¤È¡¢
+  ¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤ÈÈæ³Ó¤·¤Æ 2¡Á3 ÇÜÄøÅ٤ήÅÙ¤ÇÆ°ºî¤·¤Þ¤¹¡£
+  ¤Þ¤¿¡¢SIMD ²½¤Ë°Í¤é¤Ê¤¤¹â®²½²þ¤¤â¤¤¤¯¤Ä¤«»Ü¤µ¤ì¤Æ¤ª¤ê¡¢SIMD ±é»»¤Î»È¤¨
+  ¤Ê¤¤µì·¿CPU¤Ë¤ª¤¤¤Æ¤â¡¢¥ª¥ê¥¸¥Ê¥ëÈǤÈÈæ³Ó¤·¤Æ½½¿ô¡óÄøÅٹ⮤Ëưºî¤·¤Þ¤¹¡£
+
+  JPEG °µ½Ì¡¿Å¸³«½èÍý¤Î¹â®²½¤òÌÜŪ¤È¤·¤Æ¤¤¤Þ¤¹¤¬¡¢Æ°ºî®ÅÙºÇÍ¥Àè¤Ç¤Ï¤Ê¤¯¡¢
+  ¥ª¥ê¥¸¥Ê¥ëÈÇ¤ÈÆ±Åù°Ê¾å¤Î·×»»ÀºÅÙ¤ò»ý¤Ä¤³¤È¤òºÇÍ¥Àè¤Ë¹Í¤¨¤¿¥³¡¼¥É¤òºÎÍÑ
+  ¤·¤Æ¤¤¤Þ¤¹¡£¼ÂºÝ¡¢DCT±é»»¤ËÉâÆ°¾®¿ôÅÀDCT¤ò»È¤Ã¤¿¾ì¹ç¡¢¤ª¤è¤Ó¡¢¤ä¤äÆÃ¼ì¤Ê
+  ¥µ¥ó¥×¥ê¥ó¥°Èæ(h1v2)¤ò»ý¤ÄJPEG¥Õ¥¡¥¤¥ë¤òŸ³«¤¹¤ë¾ì¹ç¤ò½ü¤¤¤Æ¤Ï¡¢
+  ¥ª¥ê¥¸¥Ê¥ëÈǤȣ±¥Ó¥Ã¥È¤â°ã¤ï¤Ê¤¤·ë²Ì¤ò½Ð¤·¤Þ¤¹¡£¾åµ­¤Î£²¤Ä¤ÎÎã³°¤Î¾ì¹ç¤â
+  ¥ª¥ê¥¸¥Ê¥ëÈǤè¤ê¤Ï¹â²è¼Á²½(¹âÀºÅÙ²½)¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  SIMD Âбþ²½¤ËºÝ¤·¤Æ¤Ï¡¢²Äǽ¤Ê¸Â¤ê¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤È¤Î
+  ¸ß´¹À­¤¬¼º¤ï¤ì¤Ê¤¤¤è¤¦¤Ë¹Í褵¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¤Û¤È¤ó¤É¤Î¾ì¹ç¡¢¥ª¥ê¥¸¥Ê¥ë
+  ÈǤò¤½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È¤¬²Äǽ¤Ç¤¹¡£ÆÃ¤Ë¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤Ë´Ø¤·¤Æ¸À¤¨¤Ð¡¢
+  °ìÉô¤ÎÎã³°(cygwin ¤Î¾ì¹ç)¤ò½ü¤­¡¢¤½¤ì¤Ï¥ª¥ê¥¸¥Ê¥ëÈǤȥХ¤¥Ê¥ê¥ì¥Ù¥ë¤Ç¤Î
+  ¾å°Ì¸ß´¹À­¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¤½¤Î¤Þ¤Þ¥ª¥ê¥¸¥Ê¥ëÈǤòÃÖ¤­´¹¤¨¤ë¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£
+
+  SIMD Âбþ²½¤µ¤ì¤Æ¤¤¤ëÉôʬ¤Ï¡¢°Ê²¼¤Î¤È¤ª¤ê¡§
+
+  °µ½Ì½èÍý¡§
+    ¿§¶õ´ÖÊÑ´¹(RGB->YCbCr)  : MMX or SSE2
+    ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥°      : MMX or SSE2
+    DCT½çÊÑ´¹(¹âÀºÅÙÀ°¿ô)   : MMX or SSE2
+    DCT½çÊÑ´¹(¹â®À°¿ô)     : MMX or SSE2
+    DCT½çÊÑ´¹(ÉâÆ°¾®¿ô)     : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2)
+    DCT·¸¿ôÎ̻Ҳ½(À°¿ô)     : MMX or SSE2
+    DCT·¸¿ôÎ̻Ҳ½(ÉâÆ°¾®¿ô) : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2)
+
+  Å¸³«½èÍý¡§
+    ¿§¶õ´ÖÊÑ´¹(YCbCr->RGB)  : MMX or SSE2
+    ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°      : MMX or SSE2
+    DCTµÕÊÑ´¹(¹âÀºÅÙÀ°¿ô)   : MMX or SSE2
+    DCTµÕÊÑ´¹(¹â®À°¿ô)     : MMX or SSE2
+    DCTµÕÊÑ´¹(ÉâÆ°¾®¿ô)     : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2)
+    DCTµÕÊÑ´¹(½Ì¾®Å¸³«)     : MMX or SSE2
+
+  Ãí¡ËSSE2 ¤Ë¤Ä¤¤¤Æ¤Ï¡¢SIMD À°¿ô±é»»¤Î¤ß¤òÍøÍѤ·¤Æ¤¤¤Þ¤¹¡£SIMD ÇÜÀºÅÙ
+      ÉâÆ°¾®¿ôÅÀ±é»»¤ÏÍøÍѤ·¤Æ¤¤¤Þ¤»¤ó¡£¤Þ¤¿¡¢SSE3 ¤Ï»ÈÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¡£
+      ¤³¤Î JPEG ¥é¥¤¥Ö¥é¥ê¤Ë¤ª¤¤¤Æ¤Ï¡¢SSE3 ¤ò»ÈÍѤ·¤Æ¤âưºî®ÅÙ¸þ¾å¤Î
+      ¸«¹þ¤ß¤¬¤Û¤È¤ó¤É¤Ê¤¤¤¿¤á¤Ç¡¢SSE3 ¤ò¥µ¥Ý¡¼¥È¤¹¤ëͽÄê¤Ï¤¢¤ê¤Þ¤»¤ó¡£
+
+  ¤³¤Î¤Û¤«¤Ë¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ìÈÇDCT¥ë¡¼¥Á¥ó(ÈóSIMD; ½çÊÑ´¹£³¼ï¡¿µÕÊÑ´¹£´¼ï)
+  ¤Ë¤è¤ê¡¢SIMDÌ¿Îá¤Î»È¤¨¤Ê¤¤µì·¿CPU¤Ç¤â½½¿ô¡óÄøÅ٤ι⮲½¤¬´üÂԤǤ­¤Þ¤¹¡£
+  ¤µ¤é¤Ë¡¢Å¸³«½èÍý¤Ç¤Î¥Ï¥Õ¥Þ¥ó¥Ç¥³¡¼¥É¥ë¡¼¥Á¥ó¤Ï¡¢SIMD ²½¤Ë°Í¤é¤Ê¤¤ÊýË¡¤Ç
+  ¹â®²½²þ¤¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+
+¢£Âбþ¤·¤Æ¤¤¤ë¥×¥é¥Ã¥È¥Õ¥©¡¼¥à
+
+  Intel x86 CPU ¤Ë¸ÇÍ­¤Îµ¡Ç½¤òÍøÍѤ·¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤꡢ
+  Intel x86 CPU ¤ª¤è¤Ó¤½¤Î¸ß´¹ CPU ¤òºÎÍѤ·¤Æ¤¤¤ë¥·¥¹¥Æ¥à¤Ë¸Â¤é¤ì¤Þ¤¹¡£
+  PowerPC ¤Ê¤É¤Î Intel x86 ·Ï°Ê³°¤Î¥·¥¹¥Æ¥à¤Ë¤ÏÂбþ¤·¤Æ¤¤¤Þ¤»¤ó¡£
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ¶ñÂÎŪ¤Ë¤Ï¡¢80386 °Ê¹ß¤Î Intel x86 CPU ¤ª¤è¤Ó¤½¤Î¸ß´¹ CPU ¤òºÎÍѤ·¤Æ¤¤¤ë
+  ¥Ï¡¼¥É¥¦¥§¥¢¤Ç¡¢¤«¤Ä¡¢32bit¥Õ¥é¥Ã¥È¥¢¥É¥ì¥¹¥â¡¼¥É(Êݸî¥â¡¼¥É)¤ò»ÈÍѤ·¤Æ
+  ¤¤¤ë¥×¥é¥Ã¥È¥Õ¥©¡¼¥à(OS)¤¬ÂоݤǤ¹¡£¤³¤ì¤Ë¤Ï¡¢Win32 (Windows 9x·Ï/NT·Ï)
+  ¤ä³Æ¼ï PC-UNIX (linux ¤ä xBSD ¥Õ¥¡¥ß¥ê¤Ê¤É) ¤Ê¤É¤¬³ºÅö¤·¤Þ¤¹¡£¤Ê¤ª¡¢
+  AMD64 (EM64T) ¤Î64bit¥â¡¼¥É´Ä¶­¤Ë¤ÏÂбþ¤·¤Æ¤¤¤Þ¤»¤ó¡£¤´Ãí°Õ¤¯¤À¤µ¤¤¡£
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+¢£¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG library ¸ÇÍ­¤ÎÀ©¸Â
+
+  ¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG library ¤Ç¤Ï¡¢¥³¥ó¥Ñ¥¤¥ë»þ¤Î¥ª¥×¥·¥ç¥ó¤Ç¡¢
+  8bitÀºÅÙJPEG ¤È 12bitÀºÅÙJPEG ¤ÎξÊý¤ËÂбþ¤·¤Þ¤¹¤¬¡¢¤³¤Î SIMD ³ÈÄ¥ÈǤÏ
+  8bitÀºÅÙJPEG ¤Î¤ß¤ÎÂбþ¤Ç¡¢12bitÀºÅÙJPEG ¤Ë¤ÏÂбþ¤·¤Þ¤»¤ó¡£¤È¤Ï¤¤¤¨¡¢
+  12bitÀºÅÙJPEG ¤Ï°åÎÅÍÑ¤Ê¤É¤ÎÆÃ¼ìʬÌî¤ò½ü¤¤¤ÆËؤɻȤï¤ì¤Æ¤¤¤Ê¤¤¤Î¤Ç¡¢
+  ÌäÂê¤Ï¾¯¤Ê¤¤¤È»×¤¤¤Þ¤¹¡£
+
+
+¢£»È¤¤Êý
+
+  ¥Þ¥Ë¥å¥¢¥ë¤Ï¡¢°Ê²¼¤Î¥Õ¥¡¥¤¥ë¤Ëʬ¤«¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¼ÂºÝ¤Î»È¤¤Êý¤Ê¤É¤Ë
+  ¤Ä¤¤¤Æ¤Ï¡¢¤½¤Á¤é¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+    simd_README.ja.txt   - ¤³¤Î¥Õ¥¡¥¤¥ë
+    simd_filelist.ja.txt - ¼ýÏ¿¥Õ¥¡¥¤¥ë¤Î¥Õ¥¡¥¤¥ë¥ê¥¹¥È
+    simd_install.ja.txt  - ¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý
+    simd_internal.ja.txt - SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ
+    simd_cdjpeg.ja.txt   - SIMD ÈÇ cjpeg/djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤Î²òÀâ
+    simd_changes.ja.txt  - SIMD ³ÈÄ¥Éôʬ¤Î²þÈÇÍúÎò
+
+
+¢£»ÈÍѾò·ï¡¦¥µ¥Ý¡¼¥È
+
+  ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Î»ÈÍѾò·ï¤Ë¤Ä¤¤¤Æ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤÎ
+  IJG JPEG software ¤Î»ÈÍѾò·ï¤¬Å¬ÍѤµ¤ì¤Þ¤¹¡£¾Ü¤·¤¯¤Ï¡¢Æ±º­¤Î README
+  ¥Õ¥¡¥¤¥ë(±Ñʸ)¤Î LEGAL ISSUES ¤Î¹à¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+  ¾åµ­¤Î»ÈÍѾò·ï¤ÎÆâÍÆ¤Î·«¤êÊÖ¤·¤Ë¤Ê¤ê¤Þ¤¹¤¬¡¢¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤Ï¡Ö¸½¾õ¤Î
+  ¤Þ¤Þ¤Ç¡×Ä󶡤µ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¡¢¾¦¶ÈŪ¤Ê»ÈÍѲÄǽÀ­¡¢¤ª¤è¤ÓÆÃÄê¤ÎÌÜŪ¤Ë
+  ÂФ¹¤ëŬ¹çÀ­¤Ê¤É¤â´Þ¤á¡¢¤¤¤«¤Ê¤ëÊݾڤ⤢¤ê¤Þ¤»¤ó¡£
+  ¤Þ¤¿¡¢¸¶ºî¼Ô(The Independent JPEG Group)¤â²þ¤¼Ô(MIYASAKA Masaru)¤â¡¢
+  »öͳ¤Î¤¤¤«¤ó¤òÌä¤ï¤º¡¢ËÜ¥½¥Õ¥È¥¦¥§¥¢¤Î»ÈÍѤˤè¤Ã¤ÆÈ¯À¸¤·¤¿Ç¡²¿¤Ê¤ë»³²¤Ë
+  ¤Ä¤¤¤Æ¤â¡¢°ìÀÚ¤½¤ÎÀÕǤ¤òÉé¤ï¤Ê¤¤¤â¤Î¤È¤·¤Þ¤¹¡£
+
+  ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ë³«È¯¸µ¤Î IJG ¤È¤Ï´Ø·¸
+  ¤Ê¤¯¡¢ÆÈ¼«¤Ë³ÈÄ¥¤ò¹Ô¤Ê¤Ã¤¿¤â¤Î¤Ç¤¹¡£¤Ç¤¹¤Î¤Ç¡¢¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG
+  software ¤Ë´Ø¤¹¤ë¼ÁÌä¤ò¡¢¥ª¥ê¥¸¥Ê¥ë³«È¯¸µ (The Independent JPEG Group)
+  ¤ËÁ÷¤é¤Ê¤¤¤Ç¤¯¤À¤µ¤¤¡£
+
+  ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Ë´Ø¤·¤Æ¤Ï¡¢¸¶Â§¤È¤·¤Æ¥Î¡¼¥µ¥Ý¡¼¥È¤È
+  ¤µ¤»¤Æ¤¤¤¿¤À¤­¤Þ¤¹¡£¥á¡¼¥ë¤Ê¤É¤Ç¤´¼ÁÌä¤Ê¤É¤ò¤¤¤¿¤À¤­¤Þ¤·¤Æ¤â¡¢¾ï¤Ë²¿¤é¤«
+  ¤ÎÊÖÅú¤¬¤Ç¤­¤ë¤ï¤±¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¤Î¤Ç¡¢¤´¾µÃΤª¤­¤¯¤À¤µ¤¤¡£
+  ÆÃ¤Ë¡¢¡Ê¥ª¥ê¥¸¥Ê¥ë¤Î±Ñʸ¥Þ¥Ë¥å¥¢¥ë¤ò´Þ¤á¡ËƱº­¤Î¥Þ¥Ë¥å¥¢¥ëÎà¤Ë²óÅú¤¬
+  ½ñ¤¤¤Æ¤¢¤ë¼ÁÌä¤ä¡¢»ÈÍѼԤΥ½¥Õ¥È¥¦¥§¥¢µ»½Ñ¼Ô¤È¤·¤Æ¤Îµ»ÎÌÉÔ­¡¦·Ð¸³ÉÔ­¤Ë
+  ´Ø¤ï¤ë¼ÁÌä¡¢¼ÁÌä¤ÎÍ×ÎΤòÆÀ¤Ê¤¤¼ÁÌä¤Ê¤É¤Ë¤Ä¤¤¤Æ¤Ï¡¢²óÅú¤ò¤¤¤¿¤·¤Þ¤»¤ó¤Î¤Ç¡¢
+  ¤¢¤·¤«¤é¤º¤´Î»¾µ¤¯¤À¤µ¤¤¡£
+
+
+
+           E-Mail Address : alkaid@coral.ocn.ne.jp (µÜºä ¸­/MIYASAKA Masaru)
+[EOF]
diff --git a/simd_cdjpeg.ja.txt b/simd_cdjpeg.ja.txt
new file mode 100644 (file)
index 0000000..941a15a
--- /dev/null
@@ -0,0 +1,75 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == CDJPEG ==
+-----------------------------------------------------------
+
+¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï
+
+  ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈǤΠcjpeg / djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤ò²òÀ⤷¤Þ¤¹¡£
+
+    ¢£ SIMD Æ°ºî¥â¡¼¥É¾ðÊó (-v ¥ª¥×¥·¥ç¥ó)
+    ¢£ ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë (-noXXX ¥ª¥×¥·¥ç¥ó)
+    ¢£ GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤· (djpeg ¤Î -gif ¥ª¥×¥·¥ç¥ó)
+
+
+¢£ SIMD Æ°ºî¥â¡¼¥É¾ðÊó (-v ¥ª¥×¥·¥ç¥ó)
+
+  ¤³¤Î SIMD ÈÇ cjpeg / djpeg ¤Ç¤Ï¡¢-v ¥ª¥×¥·¥ç¥ó¤ò¤Ä¤±¤Æµ¯Æ°¤¹¤ë¤È¡¢°Ê²¼
+  ¤Î¤è¤¦¤Ê SIMD Æ°ºî¥â¡¼¥É¾ðÊ󤬥С¼¥¸¥ç¥ó¾ðÊó¤È¶¦¤Ëɽ¼¨¤µ¤ì¤Þ¤¹¡£
+
+    Independent JPEG Group's DJPEG, version 6b  27-Mar-1998
+    Copyright (C) 1998, Thomas G. Lane
+
+    x86 SIMD extension for IJG JPEG library, version 1.02
+
+    SIMD instructions supported by the system : MMX 3DNow! SSE SSE2
+
+          === SIMD Operation Modes ===
+    Accurate integer DCT  (-dct int)   : SSE2
+    Fast integer DCT      (-dct fast)  : SSE2
+    Floating-point DCT    (-dct float) : SSE
+    Reduced-size DCT      (-scale M/N) : SSE2
+    High-quality upsampling (default)  : SSE2
+    Low-quality upsampling (-nosmooth) : SSE2
+    Colorspace conversion (YCbCr->RGB) : SSE2
+
+  "SIMD instructions supported by the system" ¤Î¹àÌܤÇÎóµó¤µ¤ì¤ë¤Î¤Ï¡¢
+  ¥·¥¹¥Æ¥à(CPU/OS)¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤ë SIMD ±é»»¤Î¼ïÎà¤Ç¤¹¡£¤Ê¤ª¡¢¤³¤Î
+  ¥½¥Õ¥È¤Ç¤Ï SSE3 ¤Ï»ÈÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¤· SSE3 ¤Î¥µ¥Ý¡¼¥È¤Î¸¡½Ð¤â¹Ô¤Ê¤ï¤ì
+  ¤Þ¤»¤ó¤Î¤Ç¡¢SSE3 ¤¬¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤Æ¤â¤³¤Î¹àÌܤˤϸ½¤ì¤Þ¤»¤ó¡£
+
+  ¤½¤Î²¼¤Î "SIMD Operation Modes" ¤Ï¡¢³Æ½èÍýÃʳ¬¤Ç»ÈÍѤµ¤ì¤ë SIMD ±é»»¤Î
+  ¼ïÎà¤Ç¤¹¡£Floating-point DCT ¤Ç¤Ï 3DNow! ¤« SSE ¡¢¤½¤ì°Ê³°¤Î¤È¤³¤í¤Ç¤Ï
+  MMX ¤« SSE2 ¤¬ÁªÂò¤µ¤ì¤Þ¤¹¡£°ìÈ̤ˡ¢SSE/SSE2 ¤ÎÊý¤¬ MMX/3DNow! ¤è¤ê¤â
+  ¹âÀ­Ç½¤È¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤ÎÁÐÊý¤¬ÍøÍѲÄǽ¤Ê¾ì¹ç¤Ï SSE/SSE2 ¤¬Í¥ÀèŪ¤ËÁªÂò
+  ¤µ¤ì¤Þ¤¹¡£
+
+
+¢£ ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë (-noXXX ¥ª¥×¥·¥ç¥ó)
+
+  ¤³¤Î SIMD ÈÇ cjpeg / djpeg ¤Ç¤Ï¡¢°Ê²¼¤Î¤è¤¦¤Ê¥ª¥×¥·¥ç¥ó¤ò»ØÄꤹ¤ë¤³¤È¤Ç¡¢
+  ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤Ç¤­¤Þ¤¹¡£
+
+    -nommx         MMX ¤ò»ÈÍѤ·¤Ê¤¤
+    -no3dnow       3DNow! ¤ò»ÈÍѤ·¤Ê¤¤
+    -nosse         SSE ¤ò»ÈÍѤ·¤Ê¤¤
+    -nosse2        SSE2 ¤ò»ÈÍѤ·¤Ê¤¤
+    -nosimd        ¤¹¤Ù¤Æ¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤
+
+  ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ï¡¢cjpeg/djpeg ¤Î¥³¥Þ¥ó¥É¥é¥¤¥ó¤ÎÀèÆ¬¤Ë»ØÄꤹ¤ë¤è¤¦
+  ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+
+¢£ GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤· (djpeg ¤Î -gif ¥ª¥×¥·¥ç¥ó)
+
+  ¥ª¥ê¥¸¥Ê¥ëÈÇ cjpeg/djpeg ¤Î version 6b ¤Ç¤Ï¡¢ÆÃµö¾å¤ÎÌäÂ꤫¤é¡¢GIF ·Á¼°
+  ²èÁü¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤¬¥µ¥Ý¡¼¥È¤µ¤ì¤Ê¤¯¤Ê¤Ã¤Æ¤¤¤Þ¤·¤¿¡£¤Ç¤¹¤¬¡¢
+  GIF ¤Ë´Ø¤¹¤ëÆÃµö¤¬ 2003¡Á2004 Ç¯¤Ë¤«¤±¤ÆÀ¤³¦Åª¤Ë´ü¸ÂÀÚ¤ì¤Ë¤Ê¤Ã¤¿¤¿¤á¡¢
+  Åö SIMD ÈǤǤϠGIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤òÉü³è¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£
+
+  Í¾Ã̤Ǥ¹¤¬¡¢Åö SIMD ÈǤǠGIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤Ë»ÈÍѤ·¤Æ¤¤¤ë
+  ¥â¥¸¥å¡¼¥ë¤Ï¡¢µìÈǤǤ¢¤ë version 6a ¤Î¤â¤Î¤òήÍѤ·¤Æ¤¤¤Þ¤¹¡£
+
+
+
+[EOF]
diff --git a/simd_changes.ja.txt b/simd_changes.ja.txt
new file mode 100644 (file)
index 0000000..b256002
--- /dev/null
@@ -0,0 +1,24 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == CHANGES ==
+-----------------------------------------------------------
+
+IJG R6b with x86SIMD V1.02 (2006-02-04)
+---------------------------------------
+* x86 ÈÇ Darwin ¤ËÂбþ¤·¤Þ¤·¤¿¡£Æ°ºî³Îǧ¤Ï Darwin 8.0.1 for x86 ¤Ë¤Æ
+  ¹Ô¤Ê¤¤¤Þ¤·¤¿¡£x86 ÈÇ Mac OS X ¤Ç¤â¡¢Æ°ºî¤¹¤ë¤â¤Î¤È»×¤ï¤ì¤Þ¤¹¡£
+  ¤Þ¤¿¡¢Solaris 10 ¤Ç¤Îưºî³Îǧ¤â¹Ô¤Ê¤¤¤Þ¤·¤¿¡£
+
+IJG R6b with x86SIMD V1.01 (2006-01-26)
+---------------------------------------
+* jsimdgcc.c ¤ò»È¤Ã¤¿¤È¤­¡¢NEED_SHORT_EXTERNAL_NAMES ¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤È
+  Àµ¾ï¤Ë¥ê¥ó¥¯¤Ç¤­¤Ê¤¤¥Ð¥°¤ò½¤Àµ¤·¤Þ¤·¤¿¡£
+* °µ½Ì¦¤Î¥³¡¼¥É¤Î°ìÉô(jcsammmx.asm, jcsamss2.asm, jcqnt3dn.asm)¤Ë¤ä¤ä
+  ¾éĹ¤Ê²Õ½ê¤¬¤¢¤Ã¤¿¤Î¤Ç¡¢¤³¤ì¤ò½¤Àµ¤·¤Þ¤·¤¿¡£
+
+IJG R6b with x86SIMD V1.0 (2006-01-10)
+--------------------------------------
+* ºÇ½é¤Î¸ø³«ÈÇ¡£
+
+
+[EOF]
diff --git a/simd_filelist.ja.txt b/simd_filelist.ja.txt
new file mode 100644 (file)
index 0000000..4bee431
--- /dev/null
@@ -0,0 +1,261 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == FILELIST ==
+-----------------------------------------------------------
+
+¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï
+
+  ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ IJG JPEG software ¤ÎÇÛÉÛ¥¢¡¼¥«¥¤¥Ö¤Ë¼ý¤á¤é¤ì¤Æ
+  ¤¤¤ë³Æ¥Õ¥¡¥¤¥ë¤Î³µÍפò²òÀ⤷¤Þ¤¹¡£¤Ê¤ª¡¢¤³¤³¤Ç¤Ï x86 SIMD extension ¤Ç
+  ¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë¤È¡¢x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤Æµ¡Ç½¡¿
+  Ìò³ä¤¬¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤë¥Õ¥¡¥¤¥ë¤Î¤ß¤ò²òÀ⤷¤Þ¤¹¡£¤½¤ì°Ê³°¤Î¥Õ¥¡¥¤¥ë
+  ¤Ë¤Ä¤¤¤Æ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠfilelist.doc (±Ñʸ) ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+    ¢£¥Þ¥Ë¥å¥¢¥ëÎà
+    ¢£configure ¥¹¥¯¥ê¥×¥È¤Ë´Ø·¸¤¹¤ë¥Õ¥¡¥¤¥ë
+    ¢£Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë·² (vc6proj/)
+    ¢£ÆÃÄê¤Î¥³¥ó¥Ñ¥¤¥éÍѤΠjconfig.h ¤È Makefile
+    ¢£¥½¡¼¥¹¥Õ¥¡¥¤¥ë
+      ¡ü x86 SIMD extension ¤Ç¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë
+      ¡ü x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë
+    ¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/)
+    ¢£SIMD ÈǤǤϻȤï¤ì¤Ê¤¤¥Õ¥¡¥¤¥ë·² (unused/)
+
+
+¢£¥Þ¥Ë¥å¥¢¥ëÎà
+
+  °Ê²¼¤Î SIMD ÈǤΥޥ˥奢¥ë¤Î¾¤Ë¡¢¥ª¥ê¥¸¥Ê¥ëÈǤαÑʸ¥Þ¥Ë¥å¥¢¥ë¤â
+  ¤½¤Î¤Þ¤Þ¼ýÏ¿¤·¤Æ¤¢¤ê¤Þ¤¹¡£Ê»¤»¤Æ»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+  simd_README.ja.txt    ¼ç¥Þ¥Ë¥å¥¢¥ë(x86 SIMD extension ¤Î³µÍפʤÉ)
+  simd_filelist.ja.txt  SIMD ÈÇ IJG JPEG software ¤Î¥Õ¥¡¥¤¥ë¥ê¥¹¥È
+  simd_install.ja.txt   SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý
+  simd_internal.ja.txt  SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¡¢SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ
+  simd_cdjpeg.ja.txt    SIMD ÈǤΠcjpeg / djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤Î²òÀâ
+  simd_changes.ja.txt   SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î²þÈÇÍúÎò
+
+
+¢£ configure ¥¹¥¯¥ê¥×¥È¤Ë´Ø·¸¤¹¤ë¥Õ¥¡¥¤¥ë
+
+  UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ë¾ì¹ç¤ËɬÍפȤʤë¥Õ¥¡¥¤¥ë·²
+  ¤Ç¤¹¡£Èó UNIX ´Ä¶­¤Ç¤Ï¡¢ºï½ü¤·¤Æ¤â¤«¤Þ¤¤¤Þ¤»¤ó¡£
+
+  configure     configure ¥¹¥¯¥ê¥×¥ÈËÜÂÎ
+  config.ver    configure ¤«¤é¸Æ¤Ó½Ð¤µ¤ì¤ë¥¹¥¯¥ê¥×¥È¤Ç¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î
+                ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òÄêµÁ¤·¤Æ¤¤¤Þ¤¹
+  ltmain.sh     configure ¤Î¥µ¥Ý¡¼¥È¥¹¥¯¥ê¥×¥È (from GNU libtool)
+  config.guess          ¡·
+  config.sub            ¡·
+  install-sh    install ¥³¥Þ¥ó¥É¤¬¤Ê¤¤¾ì¹ç¤ÎÂåÍÑ¥¹¥¯¥ê¥×¥È
+  nasm_lt.sh    nasm ¤ò GNU libtool ¤Ç»È¤¦¾ì¹ç¤Î¥é¥Ã¥Ñ¡¦¥¹¥¯¥ê¥×¥È
+  jconfig.cfg   configure ¤¬À¸À®¤¹¤ë jconfig.h ¤Î¿÷·¿¥Õ¥¡¥¤¥ë
+  makefile.cfg  configure ¤¬À¸À®¤¹¤ë Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë
+  configure.in  configure ¥¹¥¯¥ê¥×¥È¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë (for GNU autoconf)
+  aclocal.m4            ¡·
+  libjpeg.spec  RPM ¤òºÎÍѤ·¤Æ¤¤¤ë linux ¥·¥¹¥Æ¥à¸þ¤±¤Î spec ¥Õ¥¡¥¤¥ë
+
+
+¢£ Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë·² (vc6proj/)
+
+  ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤ò»È¤¦¾ì¹ç¤Ï¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤¬¤¢¤ë°ì¤Ä¾å¤Î¥Õ¥©¥ë¥À¤Ë
+  ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤ò¤¹¤Ù¤Æ°Üư¤·¤Æ¤¯¤À¤µ¤¤¡£¾Ü¤·¤¯¤Ï¡¢simd_install.ja.txt
+  ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+  Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç¤Ï¡¢¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë
+  ¤òÊÑ´¹(¥¤¥ó¥Ý¡¼¥È)¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£
+
+  vc6proj/libjpeg.dsw   ¥×¥í¥¸¥§¥¯¥È¡¦¥ï¡¼¥¯¥¹¥Ú¡¼¥¹
+  vc6proj/makecfg.dsp   libjpeg.dsp (libjpeg.lib) ¤Î¥Ó¥ë¥É¤ËɬÍפÊ
+                        ÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºî¤ë
+  vc6proj/libjpeg.dsp   libjpeg.lib ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/cjpeg.dsp     cjpeg.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/djpeg.dsp     djpeg.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/jpegtran.dsp  jpegtran.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/rdjpgcom.dsp  rdjpgcom.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/wrjpgcom.dsp  wrjpgcom.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+  vc6proj/apptest.dsp   cjpeg, djpeg, jpegtran ¤Îưºî¥Æ¥¹¥È(make test)
+  vc6proj/jconfig.h     VC++ ÍѤΠjconfig.h (jconfig.vc ¤ÈƱ¤¸¤â¤Î)
+
+
+¢£ÆÃÄê¤Î¥³¥ó¥Ñ¥¤¥éÍѤΠjconfig.h ¤È Makefile
+
+  ¾Ü¤·¤¯¤Ï¡¢simd_install.ja.txt ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤Ë¤Ï¡¢
+  SIMD ²½¤Ëȼ¤Ã¤Æ¿·¤¿¤ËÄɲ䵤줿¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Ë´Ø¤¹¤ëµ­½Ò¤¬Äɲäµ¤ì¤Æ
+  ¤¤¤Þ¤¹¤«¤é¡¢¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î jconfig.* ¤È Makefile.* ¤Ï¡¢
+  ¤³¤Î SIMD ÈǤǤϻÈÍѤǤ­¤Þ¤»¤ó¡£
+
+  jconfig.bc5       Borland C++ Compiler 5.5 (win32) ÍѤΠjconfig.h
+  makefile.bc5      Borland C++ Compiler 5.5 (win32) ÍѤΠMakefile
+  jconfig.dj        DJGPP v2.0 ÍѤΠjconfig.h
+  makefile.dj       DJGPP v2.0 ÍѤΠMakefile
+  jconfig.mgw       MinGW ÍѤΠjconfig.h
+  makefile.mgw      MinGW ÍѤΠMakefile (ÀÅŪ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®)
+  makefile.mgwdll   MinGW ÍѤΠMakefile (DLL ÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®)
+  jconfig.vc        VC++ ÍѤΠjconfig.h
+  makefile.vc       VC++ ÍѤΠMakefile (ÀÅŪ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®)
+  makefile.vcdll    VC++ ÍѤΠMakefile (DLLÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®)
+  jconfig.linux     linux ÍѤΠjconfig.h (Ãí¡§configure ¤Î»ÈÍѤò¿ä¾©)
+  makefile.linux    linux ÍѤΠMakefile (Ãí¡§configure ¤Î»ÈÍѤò¿ä¾©)
+
+  °Ê²¼¤Î¥Õ¥¡¥¤¥ë¤Ï¡¢¾åµ­°Ê³°¤Î¥³¥ó¥Ñ¥¤¥é¤ËÂбþ¤¹¤ë jconfig.h ¤È Makefile
+  ¤ò¿·¤¿¤ËºîÀ®¤¹¤ë¾ì¹ç¤Î¿÷·¿¤È¤Ê¤ë¥Õ¥¡¥¤¥ë¤Ç¤¹¡£¥ª¥ê¥¸¥Ê¥ëÈǤˤ⸺ߤ·¤Þ¤¹
+  ¤¬¡¢SIMD ²½¤Ëȼ¤¦Êѹ¹¤¬»Ü¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  ckconfig.c        jconfig.h ¤òÀ¸À®¤¹¤ë¥×¥í¥°¥é¥à
+  makefile.ansi     Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë (ANSI ¥³¥ó¥Ñ¥¤¥éÍÑ)
+  makefile.unix     Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë (Èó ANSI ¥³¥ó¥Ñ¥¤¥éÍÑ)
+
+
+¢£¥½¡¼¥¹¥Õ¥¡¥¤¥ë
+
+  ¡ü x86 SIMD extension ¤Ç¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë
+
+  jccolmmx.asm  RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (MMX)
+  jccolss2.asm  RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (SSE2)
+  jcsammmx.asm  ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (MMX)
+  jcsamss2.asm  ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (SSE2)
+
+  jdcolmmx.asm  YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (MMX)
+  jdcolss2.asm  YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (SSE2)
+  jdsammmx.asm  ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (MMX)
+  jdsamss2.asm  ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (SSE2)
+  jdmermmx.asm  ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (MMX)
+  jdmerss2.asm  ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (SSE2)
+
+  jcqntint.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (ÈóSIMD, À°¿ô)
+  jcqntflt.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (ÈóSIMD, ÉâÆ°¾®¿ôÅÀ)
+  jcqntmmx.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (MMX, À°¿ô)
+  jcqnts2i.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE2, À°¿ô)
+  jcqnt3dn.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (3DNow! & MMX, ÉâÆ°¾®¿ôÅÀ)
+  jcqntsse.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE & MMX, ÉâÆ°¾®¿ôÅÀ)
+  jcqnts2f.asm  ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE & SSE2, ÉâÆ°¾®¿ôÅÀ)
+
+  jfdctint.asm  ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (ÈóSIMD)
+  jfmmxint.asm  ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (MMX)
+  jfss2int.asm  ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (SSE2)
+  jfdctfst.asm  ¹â®À°¿ô(½çÊý¸þ)DCT (ÈóSIMD)
+  jfmmxfst.asm  ¹â®À°¿ô(½çÊý¸þ)DCT (MMX)
+  jfss2fst.asm  ¹â®À°¿ô(½çÊý¸þ)DCT (SSE2)
+  jfdctflt.asm  ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (ÈóSIMD)
+  jf3dnflt.asm  ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (3DNow!)
+  jfsseflt.asm  ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (SSE)
+
+  jidctint.asm  ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (ÈóSIMD)
+  jimmxint.asm  ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (MMX)
+  jiss2int.asm  ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (SSE2)
+  jidctfst.asm  ¹â®À°¿ô(µÕÊý¸þ)DCT (ÈóSIMD)
+  jimmxfst.asm  ¹â®À°¿ô(µÕÊý¸þ)DCT (MMX)
+  jiss2fst.asm  ¹â®À°¿ô(µÕÊý¸þ)DCT (SSE2)
+  jidctflt.asm  ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (ÈóSIMD)
+  ji3dnflt.asm  ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (3DNow! & MMX)
+  jisseflt.asm  ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (SSE & MMX)
+  jiss2flt.asm  ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (SSE & SSE2)
+  jidctred.asm  ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (ÈóSIMD)
+  jimmxred.asm  ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (MMX)
+  jiss2red.asm  ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (SSE2)
+
+  jsimdcpu.asm  CPU ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯
+  jsimddjg.asm  OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for DJGPP V.2)
+  jsimdw32.asm  OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for Win32)
+  jsimdgcc.c    OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for gcc)
+
+  makecfg.c     ¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºîÀ®¤¹¤ë
+
+  jsimdext.inc  ¥¢¥»¥ó¥Ö¥ê¸À¸ì¥½¡¼¥¹ÍѤζ¦Ḁ̈إåÀ¥Õ¥¡¥¤¥ë
+  jdct.inc      DCT ´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë
+  jcolsamp.inc  ¿§¶õ´ÖÊÑ´¹¡¿¥µ¥ó¥×¥ê¥ó¥°´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë
+
+  jcolsamp.h    ¿§¶õ´ÖÊÑ´¹¡¿¥µ¥ó¥×¥ê¥ó¥°´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë
+                ¥ª¥ê¥¸¥Ê¥ëÈǤˤϸºß¤·¤Ê¤¤£Ã¸À¸ì¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë¤Ç¡¢
+                SIMD ²½¤ÇƳÆþ¤µ¤ì¤¿´Ø¿ô¤ÎÀë¸À¤¬µ­½Ò¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  jpegdll.def   DLL ÈÇ JPEG Library ÍѤδؿô¥¨¥¯¥¹¥Ý¡¼¥ÈÄêµÁ¥Õ¥¡¥¤¥ë
+  jpegdll.rc    DLL ÈÇ JPEG Library ÍѤΥС¼¥¸¥ç¥ó¥ê¥½¡¼¥¹ÄêµÁ¥Õ¥¡¥¤¥ë
+                ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤Ï¡¢IJG JPEG Library ¤ò¤½¤Î¤Þ¤Þ DLL ¤Ë
+                ¤¹¤ë¾ì¹ç¤Ë»ÈÍѤ·¤Þ¤¹(makefile.vcdll/makefile.mgwdll)¡£
+
+  ¡ü x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë
+
+  Êѹ¹ÆâÍÆ¤Ë¤Ä¤¤¤ÆÆÃ¤Ëµ­½Ò¤Î¤Ê¤¤¥Õ¥¡¥¤¥ë¤Ë¤Ï¡¢SIMD ²½¤Ç¿·¤¿¤ËƳÆþ¤µ¤ì¤¿
+  ´Ø¿ô¤ÎÀë¸À¤ä¤½¤Î¸Æ¤Ó½Ð¤·µ­½Ò¡¢SIMD ²½¤Ë´ØÏ¢¤¹¤ë¥Þ¥¯¥íÄêµÁ¤Ê¤É¤¬ÄɲÃ
+  ¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  jpeglib.h     JPEG ¥é¥¤¥Ö¥é¥ê¤Î¥á¥¤¥ó¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë
+  jpegint.h     JPEG ¥é¥¤¥Ö¥é¥ê¤ÎÆâÉôÍѥإåÀ¥Õ¥¡¥¤¥ë
+  jmorecfg.h    JPEG ¥é¥¤¥Ö¥é¥ê¤Î¾ÜºÙÀßÄê¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë
+
+  jdct.h        DCT ´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë
+                SIMD ½èÍý¤ËŬ¤¹¤ë¤è¤¦¤Ë¡¢´ö¤Ä¤«¤ÎÊÑ¿ô¤Î·¿¤âÊѹ¹¤µ¤ì¤Æ
+                ¤¤¤Þ¤¹¡£
+
+  jcdctmgr.c    ½çÊý¸þDCT¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È½èÍý
+  jddctmgr.c    µÕÊý¸þDCT¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È½èÍý
+
+  jccolor.c     RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (ÈóSIMD)
+  jdcolor.c     YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (ÈóSIMD)
+  jdmerge.c     ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (ÈóSIMD)
+  jcsample.c    ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (ÈóSIMD)
+  jdsample.c    ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (ÈóSIMD)
+                jdsample.c ¤Ë¤Ï¡¢ÈóSIMDÈǤΠh1v2 ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°´Ø¿ô
+                (h1v2_upsample, h1v2_fancy_upsample) ¤âÄɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  jdhuff.h      ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥Ø¥Ã¥À)
+  jdhuff.c      ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥·¡¼¥±¥ó¥·¥ã¥ë)
+  jdphuff.c     ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥×¥í¥°¥ì¥Ã¥·¥Ö)
+                ¤³¤ì¤é¤Î£³¤Ä¤Î¥Õ¥¡¥¤¥ë¤ÎÊѹ¹ÅÀ¤Ï¡¢SIMD ²½¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£
+                ¥Ç¥³¡¼¥É½èÍý¤ÎÊýË¡¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£
+
+  jdcoefct.c    DCT¥Ç¡¼¥¿¥Ö¥í¥Ã¥¯¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È
+                SIMD ²½¤È¤Ï´Ø·¸¤Ê¤¯¡¢°ìÉô¤Î¥³¡¼¥É¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£
+
+  jcomapi.c     °µ½Ì/Ÿ³« ¶¦ÄÌ API ´Ø¿ôÄêµÁ
+                SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯´Ø¿ô¤Ê¤É¤¬Äɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  jmemmgr.c     JPEG library ÍÑ¥á¥â¥ê¥Þ¥Í¡¼¥¸¥ã (¥á¥¤¥ó)
+                SIMD ²½¤Ëȼ¤¤¡¢16¥Ð¥¤¥È¥¢¥É¥ì¥¹¶­³¦¤Ë¹ç¤Ã¤¿¥á¥â¥êÎΰè¤ò
+                ¾ï¤Ë16¥Ð¥¤¥Èñ°Ì¤Ç³ÎÊݤ¹¤ë¤è¤¦¤ËÊѹ¹¤·¤Æ¤¢¤ê¤Þ¤¹¡£
+
+  cjpeg.c       JPEG °µ½ÌÍÑ ¥³¥Þ¥ó¥É¥é¥¤¥ó¡¦¥æ¡¼¥Æ¥£¥ê¥Æ¥£
+  djpeg.c       JPEG Å¸³«ÍÑ ¥³¥Þ¥ó¥É¥é¥¤¥ó¡¦¥æ¡¼¥Æ¥£¥ê¥Æ¥£
+                -v ¥ª¥×¥·¥ç¥ó¤Ç¤Î SIMD ´ØÏ¢¾ðÊó¤Îɽ¼¨¤ä¡¢-nosimd ¤Ê¤É¤Î
+                ¥ª¥×¥·¥ç¥ó¥¹¥¤¥Ã¥Á¤¬Äɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  rdbmp.c       BMP ¥Õ¥¡¥¤¥ëÆÉ¤ß¹þ¤ß¥â¥¸¥å¡¼¥ë
+  wrbmp.c       BMP ¥Õ¥¡¥¤¥ë½ñ¤­½Ð¤·¥â¥¸¥å¡¼¥ë
+                SIMD ²½¤È¤Ï´Ø·¸¤Ê¤¯¡¢°ìÉô¤Î¥³¡¼¥É¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£
+
+  rdgif.c       GIF ¥Õ¥¡¥¤¥ëÆÉ¤ß¹þ¤ß¥â¥¸¥å¡¼¥ë(version 6a)
+  wrgif.c       GIF ¥Õ¥¡¥¤¥ë½ñ¤­½Ð¤·¥â¥¸¥å¡¼¥ë(version 6a)
+                Unisys ¤Î GIF (LZW) ÆÃµö¼º¸ú¤Ëȼ¤¤¡¢version 6a ¤Ç¥µ¥Ý¡¼¥È
+                ¤µ¤ì¤Æ¤¤¤¿ cjpeg/djpeg ¤Ç¤Î GIF ¤ÎÆþ½ÐÎϤòÉü³è¤µ¤»¤Þ¤·¤¿¡£
+                ¤³¤Î GIF ¥â¥¸¥å¡¼¥ë¤Ï version 6a ¤Î¤â¤Î¤òήÍѤ·¤Æ¤¤¤Þ¤¹¡£
+                version 6b ¤Î GIF ¥â¥¸¥å¡¼¥ë¤Ï unused/ ¤Ë¤¢¤ê¤Þ¤¹¡£
+
+
+¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/)
+
+  altui/ ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤Ï¡¢¸µ¡¹ jpegaltui.v6b.tar.gz ¤È¤¤¤¦¥Õ¥¡¥¤¥ë̾¤Ç
+  Ê̤ËÇÛÉÛ¤µ¤ì¤Æ¤¤¤¿¤â¤Î¤Ç¤¹¡£¤³¤Î SIMD ÈǤǤϡ¢£±¥Õ¥¡¥¤¥ëÈÇ cjpeg/djpeg
+  ¤ÈƱÍͤΠSIMD Âбþ²½¤Ë´Ø¤¹¤ë½¤Àµ¤È¡¢Borland C++ / Microsoft VC++ ¤Ë
+  ¤ª¤¤¤Æ¥ï¡¼¥ë¥É¥«¡¼¥ÉŸ³«½èÍý¤òÍ­¸ú²½¤¹¤ë¤¿¤á¤Î¥³¡¼¥É¤ò½ñ¤­²Ã¤¨¤¿¤â¤Î¤Ç¤¹¡£
+
+  altui/cjpeg.c         Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg
+  altui/djpeg.c         Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠdjpeg
+  altui/README.alt      jpegaltui.v6b.tar.gz ¤ËƱº­¤µ¤ì¤Æ¤¤¤¿ README
+  altui/usage.alt       Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈÇ cjpeg/djpeg ¤Î¥Þ¥Ë¥å¥¢¥ë(º¹Ê¬)
+
+
+¢£SIMD ÈǤǤϻȤï¤ì¤Ê¤¤¥Õ¥¡¥¤¥ë·² (unused/)
+
+  unused/ ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG software ¤Ë¼ýÏ¿¤µ¤ì¤Æ
+  ¤¤¤¿¤¬¡¢¤³¤Î SIMD ÈǤǤϻȤï¤ì¤Ê¤¤/»È¤¨¤Ê¤¤¥Õ¥¡¥¤¥ë·²¤¬¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  unused/j?dct???.c     ¥ª¥ê¥¸¥Ê¥ë¤Î£Ã¸À¸ìÈÇ DCT ´Ø¿ô
+  unused/jmem*.*        ¥·¥¹¥Æ¥à°Í¸¥á¥â¥ê¥Þ¥Í¡¼¥¸¥ã(for MS-DOS/Macintosh)
+  unused/??gif.c        version 6b ¤Î GIF ¥â¥¸¥å¡¼¥ë
+  unused/jconfig.*      ¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î jconfig.*
+  unused/mak*.*         ¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î Makefile.*
+
+
+
+[EOF]
diff --git a/simd_install.ja.txt b/simd_install.ja.txt
new file mode 100644 (file)
index 0000000..ef8f825
--- /dev/null
@@ -0,0 +1,436 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == INSTALL ==
+-----------------------------------------------------------
+
+¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï
+
+  ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý¤ò²òÀâ
+  ¤·¤Þ¤¹¡£¾¡¼ê¤Ê¤¬¤é¡¢¤³¤³¤Ç¤Ï¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤Î°·¤¤Êý
+  (¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý¡¿¥×¥í¥°¥é¥à¤ÎÃæ¤Ç¤Î»È¤¤Êý)¤ò¤¢¤ëÄøÅÙ¿´ÆÀ¤Æ¤¤¤ë¤È¤¤¤¦
+  ¿Í¤òÂоݤˤµ¤»¤Æ¤¤¤¿¤À¤­¤Þ¤¹¡£¥ª¥ê¥¸¥Ê¥ëÈǤλÈÍÑË¡¤Ë¤Ä¤¤¤Æ¤Ï¡¢
+  install.doc (±Ñʸ) ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+    ¢£¥¢¥»¥ó¥Ö¥é NASM ¤ÎÆþ¼ê¡¿¥¤¥ó¥¹¥È¡¼¥ë
+    ¢£¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý
+      ¡ü Microsoft Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç
+      ¡ü jconfig.h ¤È Makefile ¤òÁªÂò¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë
+      ¡ü UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦
+    ¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/)
+    ¢£¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹¤Ë¤Ï
+    ¢£ÆÃÄê¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï
+
+
+¢£¥¢¥»¥ó¥Ö¥é NASM ¤ÎÆþ¼ê¡¿¥¤¥ó¥¹¥È¡¼¥ë
+
+  ¤³¤Î x86 SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î SIMD ³ÈÄ¥Éôʬ¤Ï¡¢¤½¤Î¤Û¤È¤ó¤É¤¬
+  x86 ¤Î¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Ç½ñ¤«¤ì¤Æ¤¤¤Þ¤¹¡£¤³¤Î¥¢¥»¥ó¥Ö¥ê¸À¸ì¥½¡¼¥¹¥³¡¼¥É¤ò
+  ¥¢¥»¥ó¥Ö¥ë¤¹¤ë¤Ë¤Ï¡¢NASM (Netwide Assembler) ¤È¤¤¤¦¥¢¥»¥ó¥Ö¥é¤¬É¬ÍפǤ¹¡£
+  Microsoft ¤Î MASM ¤ä¤½¤Î¸ß´¹¥¢¥»¥ó¥Ö¥é¤Ç¤Ï°·¤¨¤Þ¤»¤ó¤Î¤ÇÃí°Õ¤·¤Æ¤¯¤À¤µ¤¤¡£
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  NASM (Netwide Assembler) ¤Ï¡¢¸ø¼°¥µ¥¤¥È http://nasm.sourceforge.net/ or
+  http://sourceforge.net/project/showfiles.php?group_id=6208 ¤«¤é¥À¥¦¥ó
+  ¥í¡¼¥É¤Ç¤­¤Þ¤¹¡£Ver.0.98.25 °Ê¹ß¤ÎÈæ³ÓŪ¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤Î¤â¤Î¤¬É¬ÍפǤ¹¡£
+  ¸½»þÅÀ(2006/02)¤Ç¤ÎºÇ¿·ÈǤϠVer.0.98.39 ¤Ç¤¹¡£
+
+  ¤Ê¤ª¡¢x86 ÈǤΠDarwin ¤ä Mac OS X ¤Ê¤É¤Ç»ÈÍѤ¹¤ë¾ì¹ç¤Ï¡¢¸½»þÅÀ¤Ç¤Ï¤Þ¤À
+  Àµ¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ¤¤¤Ê¤¤ Ver.0.98.40 °Ê¹ß¤Î¥Ð¡¼¥¸¥ç¥ó¤Î¤â¤Î¤¬É¬ÍפǤ¹¡£
+  ¸½»þÅÀ¤Ç¤Ï¡¢Ver.0.98.40 ¤Ï¾åµ­¤Î¸ø¼°¥µ¥¤¥È¤Î CVS ¥ê¥Ý¥¸¥È¥ê¤«¤é¥½¡¼¥¹
+  ¥³¡¼¥É¤ò¥À¥¦¥ó¥í¡¼¥É¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¡¿¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£
+
+  Windows ·Ï¤Î¾ì¹ç¤Ï¡¢nasm-0.XX.YY-win32.zip (XX.YY ¤Ë¤Ï¥Ð¡¼¥¸¥ç¥óÈֹ椬Æþ¤ë)
+  ¤È¤¤¤¦Ì¾Á°¤Î¥Õ¥¡¥¤¥ë¤ò¥À¥¦¥ó¥í¡¼¥É¤·¤Æ¡¢¤½¤ì¤Ë´Þ¤Þ¤ì¤ë nasmw.exe ¤ò
+  £Ã¥³¥ó¥Ñ¥¤¥é¤Î¼Â¹Ô¥Õ¥¡¥¤¥ë·²¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ë¾ì½ê¤Ë¥³¥Ô¡¼¤·¤Þ¤¹¡£
+
+  ³Æ¼ï PC-UNIX ¤Î¾ì¹ç¤Ï¡¢OS ¤ÎÇÛÉÛ¸µ¤Ë¤Æ°Ü¿¢ºÑ¤ß¥Ñ¥Ã¥±¡¼¥¸¤¬Ä󶡤µ¤ì¤Æ¤¤¤ë
+  ¾ì¹ç¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¤Þ¤ººÇ½é¤Ë¤½¤Á¤é¤ò³Îǧ¤·¤Æ¤ß¤Æ¤¯¤À¤µ¤¤¡£¤½¤ì¤¬¤Ê¤¤
+  ¾ì¹ç¤Ï¡¢¾åµ­¸ø¼°¥µ¥¤¥È¤«¤é¥½¡¼¥¹¥³¡¼¥É(nasm-0.XX.YY.tar.gz)¤ò¥À¥¦¥ó¥í¡¼¥É
+  ¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¡¿¥¤¥ó¥¹¥È¡¼¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£linux ¤Î¾ì¹ç¤Ç rpm ¥Ñ¥Ã¥±¡¼¥¸¤ò
+  °·¤¨¤ë¥·¥¹¥Æ¥à¤Î¾ì¹ç¤Ï¡¢¾åµ­¸ø¼°¥µ¥¤¥È¤Ë¤Æ rpm ¥Ð¥¤¥Ê¥ê¥Ñ¥Ã¥±¡¼¥¸¤âÆþ¼ê
+  ¤Ç¤­¤Þ¤¹¡£
+
+  Ãí°ÕÅÀ¤È¤·¤Æ¡¢YASM (http://www.tortall.net/projects/yasm/) ¤Ï»È¤ï¤Ê¤¤¤Ç
+  ¤¯¤À¤µ¤¤¡£YASM ¤Ï NASM ¸ß´¹¤òëð¤Ã¤Æ¤¤¤Þ¤¹¤¬¡¢¸½ºß¤Î¥Ð¡¼¥¸¥ç¥ó(0.4.0)¤Ç¤Ï
+  ¤Þ¤À¸ß´¹ÅÙ¤¬Ä㤤¾å¤Ë¥Ð¥°¤¬¤¢¤ë(¥¢¥É¥ì¥¹·×»»¤¬¤Þ¤Ã¤¿¤¯¥Ç¥¿¥é¥á¤Ê¥³¡¼¥É¤ò
+  À¸À®¤·¤Æ¤¤¤ë)¤¿¤á¡¢ÅöÊý¤Î¥Æ¥¹¥È¤Ç¤Ï YASM ¤Ç¥¢¥»¥ó¥Ö¥ë¤·¤¿¥³¡¼¥É¤Ï¤Þ¤Ã¤¿¤¯
+  Æ°¤­¤Þ¤»¤ó¤Ç¤·¤¿¡£¡ÊÃí¡§¤³¤Î x86 SIMD extension for IJG JPEG library
+  ¤Ç¤Ï¡¢¤¿¤È¤¨ YASM ¤òÍѤ¤¤Æ¤â AMD64 ¤Î 64bit Âбþ¤Ë¤Ï¤Ê¤ê¤Þ¤»¤ó¡£¡Ë
+
+
+¢£¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý
+
+  ¤Û¤È¤ó¤É¤ÎÉôʬ¤Ç¥ª¥ê¥¸¥Ê¥ëÈǤÈÊѤï¤ê¤¢¤ê¤Þ¤»¤ó¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤Ç¤Ï¡¢
+  ¤³¤Î SIMD ³ÈÄ¥ÈÇ¤ËÆÃÍ­¤ÎÃí°ÕÅÀ¤òÃæ¿´¤Ë½Ò¤Ù¤Þ¤¹¡£
+
+
+  ¡ü Microsoft Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç
+
+    ¤³¤ÎÇÛÉÛ¥»¥Ã¥È¤Ë¤Ï¡¢Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë
+    ¤¬ÉÕ°¤·¤Æ¤¤¤Þ¤¹¡£V6.0 °Ê¹ß¤Î VC++ ¤Î¾ì¹ç¤Ï¡¢VC++ 6.0 ¤Î¥Õ¥¡¥¤¥ë¤ò
+    ÊÑ´¹(¥¤¥ó¥Ý¡¼¥È)¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£Microsoft Visual C++ 2005 Express
+    Edition ¤Ë¤Æ¡¢¥¤¥ó¥Ý¡¼¥È¡¿¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£
+
+    ÊýË¡¤Ï¡¢¤Þ¤º vc6proj ¥Õ¥©¥ë¥À¤ÎÃæ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤ò¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë·²
+    (*.c) ¤Î¤¢¤ë¥Õ¥©¥ë¥À¤Ë¤¹¤Ù¤Æ°Üư¤·¤Þ¤¹¡£¤½¤·¤Æ libjpeg.dsw ¤ò³«¤¤¤Æ
+    (¤â¤·¤¯¤Ï¥¤¥ó¥Ý¡¼¥È¤·¤Æ)¡¢¥á¥Ë¥å¡¼¤Î ¥Ó¥ë¥É¢ª¥Ð¥Ã¥Á¥Ó¥ë¥É ¤Ç¤¹¤Ù¤Æ¤Ë
+    ¥Á¥§¥Ã¥¯¤òÆþ¤ì¤Æ¥Ó¥ë¥É¤¹¤ì¤Ð£Ï£Ë¤Ç¤¹¡£
+
+    ¥ï¡¼¥¯¥¹¥Ú¡¼¥¹ libjpeg.dsw ¤Ë¤Ï¡¢°Ê²¼¤Î¥×¥í¥¸¥§¥¯¥È¤¬¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡£
+
+    ¢¡ makecfg.dsp
+
+      ¤³¤Î¥×¥í¥¸¥§¥¯¥È¤Ï¡¢makecfg.c ¤ò¥³¥ó¥Ñ¥¤¥ë¡¿¥ê¥ó¥¯¤·¤Æ¡¢¥«¥¹¥¿¥à
+      ¥Ó¥ë¥É¥¹¥Æ¥Ã¥×¤Ç¤½¤ì(makecfg.exe)¤ò¼Â¹Ô¤·¡¢libjpeg.dsp ¤Î¥Ó¥ë¥É¤Ë
+      É¬ÍפÊÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºî¤ëƯ¤­¤ò¤·¤Æ¤¤¤Þ¤¹¡£
+
+      ¤³¤ì¤Ï¡¢libjpeg.dsp ¤¬¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ëÁ°¤Ëɬ¤º¥³¥ó¥Ñ¥¤¥ë(¼Â¹Ô)
+      ¤µ¤ì¤Ê¤±¤ì¤Ð¤Ê¤ê¤Þ¤»¤ó¡£
+
+    ¢¡ libjpeg.dsp
+
+      JPEG ¥é¥¤¥Ö¥é¥ê libjpeg.lib ¤ò¥Ó¥ë¥É¤·¤Þ¤¹¡£makecfg.dsp ¤¬½ÐÎϤ¹¤ë
+      ÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤¬É¬ÍפǤ¹¡£
+
+    ¢¡ cjpeg.dsp, djpeg.dsp, jpegtran.dsp, rdjpgcom.dsp, wrjpgcom.dsp
+
+      IJG JPEG library ¤ËÉÕ°¤·¤Æ¤¤¤ë¥µ¥ó¥×¥ë¡¦¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤Ç¤¹¡£
+      cjpeg ¤Ï JPEG °µ½Ì¤ò¡¢djpeg ¤Ï JPEG Å¸³«¤ò¡¢jpegtran ¤Ï JPEG
+      ¥Õ¥¡¥¤¥ë¤ÎÊÑ´¹¤ò¤·¤Þ¤¹¡£rdjpgcom ¤È wrjpgcom ¤Ï¡¢JPEG¥Õ¥¡¥¤¥ë
+      Ãæ¤Î¥Æ¥­¥¹¥È¥³¥á¥ó¥Èʸ¤òÁàºî¤·¤Þ¤¹¡£
+
+    ¢¡ apptest.dsp
+
+      ¤Ç¤­¤¢¤¬¤Ã¤¿ cjpeg, djpeg, jpegtran ¤ËÂФ·¤Æ´Êñ¤Êưºî¥Æ¥¹¥È¤ò
+      ¤·¤Þ¤¹(make test ¤ÈƱÅù)¡£¤Þ¤º¡¢¤³¤ì¤é¤Î¥½¥Õ¥È¤Ç¼ÂºÝ¤Ë²èÁü¥Õ¥¡¥¤¥ë
+      ¤òÊÑ´¹¤µ¤»¡¢¤½¤Î½ÐÎϤò fc.exe ¤ÇÈæ³Ó¤·¤Þ¤¹¡£"FC: Áê°ãÅÀ¤Ï¸¡½Ð
+      ¤µ¤ì¤Þ¤»¤ó¤Ç¤·¤¿" ¤È¤¤¤¦¥á¥Ã¥»¡¼¥¸¤¬£¶²óɽ¼¨¤µ¤ì¤ì¤Ð¡¢¥×¥í¥°¥é¥à¤Ï
+      Àµ¤·¤¯Æ°¤¤¤Æ¤¤¤Þ¤¹¡£
+
+    JPEG ¥é¥¤¥Ö¥é¥ê libjpeg.lib ¤ò¾¤Î¥½¥Õ¥È¤Ë¥ê¥ó¥¯¤¹¤ë¤¿¤á¤Ë libjpeg.dsp
+    ¤ò¾¤Î¥ï¡¼¥¯¥¹¥Ú¡¼¥¹¡¿¥½¥ê¥å¡¼¥·¥ç¥ó¤Ë´Þ¤á¤ë¾ì¹ç¤Ï¡¢makecfg.dsp ¤â
+    É¬¤ºÆ±¤¸¥ï¡¼¥¯¥¹¥Ú¡¼¥¹¡¿¥½¥ê¥å¡¼¥·¥ç¥ó¤Ë´Þ¤á¡¢¾ï¤Ë makecfg.dsp ¤¬
+    libjpeg.dsp ¤è¤ê¤âÀè¤Ë¥Ó¥ë¥É¤µ¤ì¤ë¤è¤¦¤Ë¡¢°Í¸´Ø·¸¤òÀßÄꤷ¤Æ¤¯¤À¤µ¤¤¡£
+    ¤³¤ì¤Ï¡¢Àè¤Ë½Ò¤Ù¤¿¤è¤¦¤Ë¡¢libjpeg.dsp ¤Ï makecfg.dsp ¤¬½ÐÎϤ¹¤ëÀßÄê
+    ¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òɬÍפȤ·¤Æ¤¤¤ë¤¿¤á¤Ç¤¹¡£
+
+    ÀßÄêË¡¤Ï¡¢(VC++ 6.0¤Î¾ì¹ç) ¥á¥Ë¥å¡¼¤Î ¥×¥í¥¸¥§¥¯¥È ¢ª °Í¸´Ø·¸ ¤Ç
+    libjpeg ¤òÁªÂò¤·¡¢²¼¤Î°ìÍ÷¤ÎÃæ¤Î makecfg ¤Ë¥Á¥§¥Ã¥¯¤òÆþ¤ì¤Þ¤¹¡£
+
+
+  ¡ü jconfig.h ¤È Makefile ¤òÁªÂò¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë
+
+    Windows ·Ï¤Î¥³¥ó¥Ñ¥¤¥é¤Î¾ì¹ç¤Ç¡¢¥³¥Þ¥ó¥É¥é¥¤¥ó¤«¤é¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¾ì¹ç
+    ¤Ï¡¢¤³¤ÎÊýË¡¤ò¤È¤Ã¤Æ¤¯¤À¤µ¤¤¡£
+
+    ÊýË¡¤Ï¡¢ÉÕ°¤Î jconfig.* ¤È Makefile.* ¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤ò¤½¤ì¤¾¤ì
+    °ì¤Ä¤º¤ÄÁª¤Ó¡¢¤½¤ì¤¾¤ì jconfig.h ¤È Makefile ¤Ë̾Á°¤òÊѤ¨¤Þ¤¹¡£
+    ¤½¤·¤Æ¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Î¤¢¤ë¥Ç¥£¥ì¥¯¥È¥ê(¥Õ¥©¥ë¥À)¤ò¥«¥ì¥ó¥È¥Ç¥£¥ì
+    ¥¯¥È¥ê¤Ë¤·¤Æ¡¢¥³¥Þ¥ó¥É¥é¥¤¥ó¤Ç make (VC++ ¤Î¾ì¹ç¤Ï nmake)¤È¥¿¥¤¥×
+    ¤¹¤ì¤Ð£Ï£Ë¤Ç¤¹¡£¸å¤Ï¼«Æ°Åª¤Ë¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤È¡¢¥µ¥ó¥×¥ë¥¢¥×¥ê
+    ¥±¡¼¥·¥ç¥ó(cjpeg ¤ä djpeg ¤Ê¤É)¤¬¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤Þ¤¹¡£
+
+    ÉÕ°¤·¤Æ¤¤¤ë jconfig.* ¤È Makefile.* ¤Ï¡¢°Ê²¼¤Î½èÍý·Ï¤ËÂбþ¤·¤Æ¤¤¤Þ¤¹¡£
+    ¤Ê¤ª¡¢¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤·¤Æ¤¤¤ë jconfig.* ¤È Makefile.* ¤Ï¡¢¤³¤Î
+    SIMD ÂбþÈǤǤϻÈÍѤǤ­¤Þ¤»¤ó(¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Î¹½À®¤¬ÊѤï¤Ã¤Æ¤¤¤ë¤¿¤á)¡£
+
+    ¡¦ jconfig.dj  & makefile.dj  -- DJGPP v2.0 or later
+    ¡¦ jconfig.bc5 & makefile.bc5 -- Borland C++ Compiler 5.5 (win32)
+    ¡¦ jconfig.mgw & makefile.mgw, makefile.mgwdll -- MinGW
+    ¡¦ jconfig.vc  & makefile.vc , makefile.vcdll  -- VC++ 6.0 or later
+    ¡¦ jconfig.linux & makefile.linux -- linux
+
+    ¤³¤ì¤é°Ê³°¤Î¥³¥ó¥Ñ¥¤¥é¤ËÂбþ¤µ¤»¤ë¾ì¹ç¤Ï¡¢install.doc ¤Ë½ñ¤¤¤Æ¤¢¤ë
+    ¤è¤¦¤Ë¡¢¤Þ¤º ckconfig.c ¤ò¥³¥ó¥Ñ¥¤¥ë¡¿¼Â¹Ô¤·¤Æ jconfig.h ¤òÀ¸À®¤·¡¢
+    makefile.unix ¤â¤·¤¯¤Ï makefile.ansi ¤ò¼êºî¶È¤ÇÊÔ½¸¤·¤Æ Makefile ¤ò
+    ºîÀ®¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+    °Ê²¼¡¢³Æ¥³¥ó¥Ñ¥¤¥é¤Ë¸ÇÍ­¤ÎÃí°ÕÅÀ¤ò½Ò¤Ù¤Þ¤¹¡£
+
+    ¢¡ jconfig.dj  & makefile.dj  -- DJGPP v2.0 or later.
+
+      ²Äǽ¤Ê¸Â¤ê¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£DJGPP 2.03 + gcc 3.4.4
+      + binutils 2.16.1 ¤È¤¤¤¦ÁȤ߹ç¤ï¤»¤Çưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£
+
+      makefile.dj ¤Ë¤Ï¡¢gcc 3.4.x ¸þ¤±¤Î¥³¥ó¥Ñ¥¤¥ë¥ª¥×¥·¥ç¥ó¤¬½ñ¤«¤ì¤Æ
+      ¤¤¤Þ¤¹¤Î¤Ç¡¢gcc 3.4.x °Ê³°¤Ç¤¦¤Þ¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢Å¬Åö¤Ë
+      ½ñ¤­Ä¾¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£
+
+    ¢¡ jconfig.bc5 & makefile.bc5 -- Borland C++ Compiler 5.5 (win32)
+
+      ²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ï¤Ç¤­¤ë¤Ï¤º¤Ç¤¹¤¬¡¢¤³¤Î BCC 5.5 ¤Î¾ì¹ç¤Ï¡¢
+      ¤Û¤È¤ó¤É¤Î¥±¡¼¥¹¤Ç SSE/SSE2 ¤¬Æ°ºî¤·¤Þ¤»¤ó¡£Íýͳ¤Ï¡¢BCC 5.5 ¤ËÉÕ°¤Î
+      ¥ê¥ó¥«(ilink32.exe)¤¬¸Å¤¯¡¢SSEÄê¿ô¤ò16¥Ð¥¤¥È¶­³¦¥¢¥É¥ì¥¹¤ËÇÛÃÖ¤¹¤ë
+      ¤³¤È¤¬¤Ç¤­¤Ê¤¤¤¿¤á¤Ç¤¹¡£
+
+      ¤½¤Î¤¿¤á¡¢BCC 5.5 ¤ò»È¤¦¾ì¹ç¤Ï SSE/SSE2 ¤Î¥µ¥Ý¡¼¥È¤òºï½ü(¸å½Ò)¤·¤Æ
+      ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤³¤È¤ò¤ª´«¤á¤¤¤¿¤·¤Þ¤¹¡£
+
+    ¢¡ jconfig.mgw & makefile.mgw, makefile.mgwdll -- MinGW
+
+      ²Äǽ¤Ê¸Â¤ê¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£gcc 3.4.4 + binutils
+      2.16.91 ¤È¤¤¤¦ÁȤ߹ç¤ï¤»¤Çưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£
+
+      ¤Þ¤¿¡¢GNU make ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ë¤³¤È¤âɬÍפǤ¹¡£MinGW ¤Î¾ì¹ç¡¢
+      (make.exe ¤Ç¤Ï¤Ê¤¯) mingw32-make.exe ¤È¤¤¤¦Ì¾Á°¤Ç¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì
+      ¤Þ¤¹¤Î¤Ç¡¢make ¤Ç¤Ï¤Ê¤¯ mingw32-make ¤È¥¿¥¤¥×¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£
+
+      ¤³¤ì¤é¤Î makefile ¤Ë¤Ï¡¢gcc 3.4.x ¸þ¤±¤Î¥³¥ó¥Ñ¥¤¥ë¥ª¥×¥·¥ç¥ó¤¬½ñ¤«
+      ¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢gcc 3.4.x °Ê³°¤Ç¤¦¤Þ¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢
+      Å¬Åö¤Ë½ñ¤­Ä¾¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£
+
+      makefile.mgw ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤òÀÅۥ饤¥Ö¥é¥ê(libjpeg.a)¤Ë
+      ¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î libjpeg.a ¤ò¥ê¥ó¥¯¤·¤¿ cjpeg, djpeg,
+      jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£makefile.mgwdll ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤ò
+      DLL(jpeg62.dll)¤Ë¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î jpeg62.dll ¤ò¥ê¥ó¥¯¤·¤¿
+      cjpeg, djpeg, jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£
+
+    ¢¡ jconfig.vc  & makefile.vc , makefile.vcdll  -- VC++ 6.0 or later
+
+      VC++ 6.0 °Ê¹ß¤Ê¤é²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤Ï¤º¤Ç¤¹¡£¥Õ¥ê¡¼¤Ç
+      ¸ø³«¤µ¤ì¤Æ¤¤¤ë Microsoft Visual C++ Toolkit 2003 ¤Ç¤â¡¢nmake.exe
+      ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ì¤Ð¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Þ¤¹¡£
+
+      makefile.vc ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤òÀÅۥ饤¥Ö¥é¥ê(libjpeg.lib)¤Ë
+      ¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î libjpeg.lib ¤ò¥ê¥ó¥¯¤·¤¿ cjpeg, djpeg,
+      jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£makefile.vcdll ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤ò
+      DLL(jpeg62.dll)¤Ë¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î jpeg62.dll ¤ò¥ê¥ó¥¯¤·¤¿
+      cjpeg, djpeg, jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£
+
+    ¢¡ jconfig.linux & makefile.linux -- linux
+
+      ³Æ¼ï¤Î linux ¤ËÂбþ¤·¤Þ¤¹¤¬¡¢linux ¤Î¾ì¹ç¤Ï configure ¥¹¥¯¥ê¥×¥È¤ò
+      »È¤¦¤³¤È¤ò¶¯¤¯¤ª´«¤á¤¤¤¿¤·¤Þ¤¹¡£
+
+
+  ¡ü UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦
+
+    ³Æ¼ï¤Î PC-UNIX ¤Î¾ì¹ç¤Ê¤É¡¢¥·¥§¥ë¡¦¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ë¤³¤È¤Î¤Ç¤­¤ë
+    ´Ä¶­¤Î¾ì¹ç¤Ï¡¢configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦¤È´Êñ¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Þ¤¹¡£
+
+      $ ./configure --enable-shared --enable-static
+      $ make
+      $ make test      (¢«Æ°ºî¥Æ¥¹¥È¡¨É¬Íפ˱þ¤¸¤Æ)
+      # make install
+
+    ¤³¤ÎÊýË¡¤Ç¤Ï¡¢°Ê²¼¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ç¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤³¤È¤ò³Îǧ¤·¤Æ
+    ¤¤¤Þ¤¹¡£¤³¤ì°Ê³°¤Î UNIX ·Ï OS ¤Ç¤â¥³¥ó¥Ñ¥¤¥ë¤Ï²Äǽ¤À¤È»×¤ï¤ì¤Þ¤¹¤¬¡¢
+    ¼ã´³¤Î¼êľ¤·¤¬É¬Íפˤʤë¾ì¹ç¤â¤¢¤ë¤«¤È»×¤¤¤Þ¤¹¡£
+
+    ¡¦Vine Linux 2.6r4 (gcc 2.95.3) ¤ª¤è¤Ó Vine Linux 3.2 (gcc 3.4.4)
+    ¡¦Fedora core 3 (gcc 3.4.2) ¤ª¤è¤Ó Fedora core 4 (gcc 4.0.0)
+    ¡¦FreeBSD 5.4 (gcc 3.4.2) ¤ª¤è¤Ó FreeBSD 6.0 (gcc 3.4.4)
+    ¡¦NetBSD 2.0 (gcc 3.3.3) ¤ª¤è¤Ó NetBSD 3.0 (gcc 3.3.3)
+    ¡¦Solaris 10 1/06 for x64/x86 (i386-pc-solaris2.10)
+    ¡¦Darwin 8.0.1 for x86 (i386-apple-darwin8.0.1; gcc 3.3)
+    ¡¦MinGW & MSYS (gcc 3.4.4)
+    ¡¦cygwin (gcc 3.4.4)
+
+    Æ±º­¤Î configure ¥¹¥¯¥ê¥×¥È¤Ï GNU autoconf ¤ÇÀ¸À®¤µ¤ì¤¿¤â¤Î¤Ç¤¹¡£
+    ¤³¤Î configure ¥¹¥¯¥ê¥×¥È¤Ï¡¢°ìÈÌŪ¤Ê configure ¥¹¥¯¥ê¥×¥È¤¬Ç§¼±¤¹¤ë
+    ¥¹¥¤¥Ã¥Á¥ª¥×¥·¥ç¥ó¤Î¾¤Ë¡¢°Ê²¼¤Î¥¹¥¤¥Ã¥Á¤òǧ¼±¤·¤Þ¤¹¡£
+
+    ¡û --enable-shared / --enable-static
+
+      --enable-shared ¤ò»ØÄꤹ¤ë¤È¡¢GNU libtool ¤ò»È¤Ã¤Æ¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò
+      ºîÀ®¤·¤Þ¤¹¡£--enable-static ¤ò»ØÄꤹ¤ë¤È¡¢Æ±¤¸¤¯ GNU libtool ¤ò
+      »È¤Ã¤ÆÀÅۥ饤¥Ö¥é¥ê¤òºîÀ®¤·¤Þ¤¹¡£Î¾Êý¤ò»ØÄꤹ¤ë¤È¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê
+      ¤ÈÀÅۥ饤¥Ö¥é¥ê¤ÎξÊý¤òºîÀ®¤·¤Þ¤¹¡£µÕ¤Ë¤³¤ÎξÊý¤È¤â»ØÄꤷ¤Ê¤¤¤È¡¢
+      GNU libtool ¤ò»È¤ï¤º¤ËÀÅۥ饤¥Ö¥é¥ê¤Î¤ß¤òºîÀ®¤·¤Þ¤¹¡£
+
+      ¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤¹¤ë¾ì¹ç¤ÏξÊý¤ò»ØÄꤹ¤ë
+      É¬Íפ¬¤¢¤ë¤Ç¤·¤ç¤¦¡£
+
+    ¡û --disable-mmx / --disable-3dnow / --disable-sse / --disable-sse2
+
+      ÆÃÄê¤Î SIMD Ì¿Î᥻¥Ã¥È¤Î¥µ¥Ý¡¼¥È(¥³¡¼¥É)¤òºï½ü¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Þ¤¹¡£
+
+    ¡û --enable-uchar-boolean
+
+      ¥½¡¼¥¹¥³¡¼¥ÉÃæ¤Î bool ·¿¤ÎÄêµÁ¤ò int ·¿¤Ç¤Ï¤Ê¤¯ unsigned char ·¿¤Ë
+      Êѹ¹¤·¤Þ¤¹¡£¤³¤Î¥ª¥×¥·¥ç¥ó¤ÏÄ̾ï¤Ï(ÆÃ¤ËɬÍפǤʤ¤¸Â¤ê)»ÈÍѤ·¤Ê¤¤¤Ç
+      ¤¯¤À¤µ¤¤¡£¤³¤ì¤Ï¡¢MinGW ¤Ê¤É¤Î Windows ¾å¤Î UNIX ´Ä¶­¤Ë¤Æ¡¢bool ·¿
+      ¤ÎÄêµÁ¤ò Windows ¤Î½¬´·¤Ë¹ç¤ï¤»¤ë¤¿¤á¤ËÍѰդµ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¤¹¡£
+
+    °Ê²¼¡¢³Æ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¸ÇÍ­¤ÎÃí°ÕÅÀ¤ò½Ò¤Ù¤Þ¤¹¡£
+
+    ¢¡ ³Æ¼ï linux ¥Ç¥£¥¹¥È¥ê¥Ó¥å¡¼¥·¥ç¥ó
+
+      ¤è¤Û¤É¸Å¤¤¤â¤Î¤Ç¤Ê¤¤¸Â¤ê¡¢¾åµ­¤Î¼ê½ç¤Ç²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë
+      ¤Ï¤º¤Ç¤¹¡£
+
+      ¾åµ­¤Î¼ê½ç¤Ç¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤·¤¿¾ì¹ç¡¢¤½¤Î¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï
+      62.1.0 (¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.1.0) ¤È¤Ê¤ê¤Þ¤¹¡£¤³¤ì¤ÏÁ°½Ò¤·¤¿
+      ¤È¤ª¤ê¡¢¥ª¥ê¥¸¥Ê¥ëÈÇ(¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.0.0)¤È¥Ð¥¤¥Ê¥ê¥ì¥Ù¥ë
+      ¤Ç¤Î¾å°Ì¸ß´¹À­¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤ½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È
+      ¤¬¤Ç¤­¤Þ¤¹¡£
+
+      rpm ¤ò»È¤Ã¤¿¥Ñ¥Ã¥±¡¼¥¸´ÉÍý¤òºÎÍѤ·¤Æ¤¤¤ë¥Ç¥£¥¹¥È¥ê¥Ó¥å¡¼¥·¥ç¥ó¤Ç¤Ï¡¢
+      Æ±º­¤Î spec ¥Õ¥¡¥¤¥ë (libjpeg.spec) ¤â¤´ÍøÍѤ¤¤¿¤À¤±¤Þ¤¹¡£¤³¤ì¤Ï¡¢
+      Vine Linux 3.2 ¤ª¤è¤Ó Fedora core 4 ¤Ç¤Îưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£
+
+    ¢¡ xBSD ¥Õ¥¡¥ß¥ê (FreeBSD/NetBSD/OpenBSD)
+
+      ºÇ¶á¤Î¥Ð¡¼¥¸¥ç¥ó¤Î FreeBSD ¤È NetBSD ¤Ë´Ø¤·¤Æ¤Ï¡¢¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë
+      ¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£OpenBSD ¤Ë´Ø¤·¤Æ¤âÌäÂê¤Ï¤Ê¤¤¤È»×¤¤¤Þ¤¹¡£
+      ¤¿¤À¡¢¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥©¡¼¥Þ¥Ã¥È¤Ë a.out ¤ò»È¤Ã¤¿¸Å¤¤¤â¤Î¤Ë´Ø¤·¤Æ¤Ï¡¢
+      ¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¤Î¼ïÎà¤â°ì±þ configure ¥¹¥¯¥ê¥×¥È¤Ë¤Æ¸¡½Ð¤Ç¤­¤ë
+      ¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¤¬¡¢Æ°ºî¤Ï̤³Îǧ¤Ç¤¹¡£
+
+      FreeBSD ¤Î¾ì¹ç¡¢¾åµ­¤Î¼ê½ç¤Ç¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤·¤¿¾ì¹ç¡¢¤½¤Î
+      ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï 9 (¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.9) ¤È¤Ê¤ê¤Þ¤¹¡£¤³¤ì¤Ï¡¢
+      ports collection ¤ÎÃæ¤Ë¤¢¤ë¸ø¼°ÈǤΥС¼¥¸¥ç¥óÈÖ¹æ¤Ë½à¤¸¤¿¤â¤Î¤Ç¡¢
+      (¥Ð¥¤¥Ê¥ê¾å°Ì¸ß´¹¤Ê¤Î¤Ç)¸ø¼°ÈǤȤ½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£
+      NetBSD/OpenBSD ¤Î¾ì¹ç¤Î¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï 62.1.0 (¥Õ¥¡¥¤¥ë̾¡§
+      libjpeg.so.62.1.0) ¤Ë¤Ê¤ê¤Þ¤¹(¥Ð¥¤¥Ê¥ê¾å°Ì¸ß´¹)¡£
+
+    ¢¡ Solaris 10
+
+      ºî¼Ô¤Î¥Æ¥¹¥È¤Ç¤Ï¡¢Àµ¾ï¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Æ¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤¬¥Ð¥¤¥Ê¥ê
+      ¸ß´¹¤Ë¤Ê¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹(¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.1.0)¡£
+      ¤Ç¤¹¤¬¡¢¤³¤Î SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Ï AMD64 ¤Ë¤Ï¸½»þÅÀ¤Ç¤ÏÂбþ
+      ¤·¤Æ¤¤¤Ê¤¤¤¿¤á¡¢32bitÈǤΥ饤¥Ö¥é¥ê¤·¤«ºî¤ì¤Þ¤»¤ó¡£
+
+    ¢¡ Darwin for x86
+
+      ºî¼Ô¤Î¥Æ¥¹¥È¤Ç¤Ï¡¢Àµ¾ï¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Æ¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤¬¥Ð¥¤¥Ê¥ê
+      ¸ß´¹¤Ë¤Ê¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹(¥Õ¥¡¥¤¥ë̾¡§libjpeg.62.1.0.dylib)¡£
+      x86 ÈÇ Mac OS X ¤Ç¤â¡¢¥³¥ó¥Ñ¥¤¥ë¤µ¤¨¤Ç¤­¤ì¤Ðưºî¤¹¤ë¤â¤Î¤È»×¤ï¤ì¤Þ¤¹¡£
+
+      ¸½»þÅÀ¤Ç¤Ï¡¢¥¢¥»¥ó¥Ö¥é nasm ¤Î Darwin ¤Ø¤Î¥µ¥Ý¡¼¥È¤¬½½Ê¬¤Ç¤Ê¤¤¤¿¤á¡¢
+      ¤ä¤ä¥È¥ê¥Ã¥­¡¼¤Ê¥³¡¼¥É¤Ç Darwin / Mac OS X ¤ËÂбþ¤µ¤»¤Æ¤¤¤Þ¤¹¤¬¡¢
+      Æ°ºî¤Ë¤Ï¤Þ¤Ã¤¿¤¯ÌäÂê¤Ê¤¤¤Ï¤º¤Ç¤¹¡£
+
+    ¢¡ MinGW & MSYS (gcc 3.4.4)
+
+      MinGW ¤Î¾ì¹ç¤Ï¡¢ÉÕ°¤Î makefile.mgw / makefile.mgwdll ¤ò»È¤¦¤³¤È¤ò
+      ¿ä¾©¤·¤Þ¤¹¤¬¡¢MSYS ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ì¤Ð configure ¥¹¥¯¥ê¥×¥È
+      ¤â»È¤¨¤Þ¤¹¡£¤³¤Î¾ì¹ç¤Ï¡¢configure ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ëºÝ¤Ë
+      --enable-uchar-boolean ¤òɬ¤º»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£¤³¤¦¤¹¤ë¤³¤È¤Ç¡¢
+      Â¾¤Î Windows ·Ï¤Î½èÍý·Ï(VC++¤Ê¤É)¤¬½ÐÎϤ¹¤ë¥³¡¼¥É¤È¥Ð¥¤¥Ê¥ê¸ß´¹¤Ë
+      ¤Ê¤ê¤Þ¤¹¡£
+
+    ¢¡ cygwin (gcc 3.4.4)
+
+      ¤Þ¤ºÃí°Õ¤¹¤Ù¤­ÅÀ¤Ï¡¢cygwin ¤Î¾ì¹ç¡¢cygwin ¤«¤é¸ø¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ
+      ¤¤¤ë DLL (cygjpeg-62.dll) ¤È¤Ï¥Ð¥¤¥Ê¥ê¸ß´¹¤Ë¤Ï¤Ê¤ê¤Þ¤»¤ó¡£¤³¤ì¤Ï¡¢
+      ¸ø¼°ÈǤΥХ¤¥Ê¥ê¤Ë¤Ï lossless jpeg patch (ljpeg-6b.tar.gz) ¤È¤¤¤¦
+      ½¤Àµ¥Ñ¥Ã¥Á¤¬´Þ¤Þ¤ì¤Æ¤¤¤ë¤¿¤á¤Ç¡¢ÅöSIMDÈǤËÂФ·¤Æ¤³¤Î¥Ñ¥Ã¥Á¤òŬÍÑ
+      ¤¹¤ë¤³¤È¤Ïº¤Æñ¤À¤«¤é¤Ç¤¹¡£
+
+      ¥Ð¥¤¥Ê¥ê¸ß´¹¤Ç¤Ï¤Ê¤¤¤¿¤á¡¢¸ø¼°¥ê¥ê¡¼¥¹ÈǤΠDLL ¤ò¤³¤ÎSIMDÈǤÇÃÖ¤­
+      ´¹¤¨¤ë¤³¤È¤Ï¤Ç¤­¤Þ¤»¤ó¡£¤½¤Î¤¿¤áÅöSIMDÈǤΠDLL ¤Ï cygjpeg-162.dll
+      ¤È¤¤¤¦Ì¾Á°¤Ë¤Ê¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¡£¤³¤ì¤ò¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë
+      ¤³¤È¤â¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤ÎSIMDÈǤΠDLL ¤òÍøÍѤ¹¤ë¤Ë¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤ò
+      »ÈÍѤ·¤Æ¤¤¤ë¥½¥Õ¥È¤òºÆ¥³¥ó¥Ñ¥¤¥ë¡¿ºÆ¥ê¥ó¥¯¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£
+
+      ¤Ê¤ª¡¢¤³¤Î DLL ¤Ë¤Ä¤±¤é¤ì¤ë¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òÊѤ¨¤¿¤±¤ì¤Ð¡¢configure
+      ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ëÁ°¤Ë config.ver ¤ÎÆâÍÆ¤òÊѹ¹¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+
+¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/)
+
+  ¥µ¥ó¥×¥ë¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤Î cjpeg ¤È djpeg ¤Ë¤Ä¤¤¤Æ¤Ç¤¹¤¬¡¢¥Ç¥Õ¥©¥ë¥È¤Î
+  ¾õÂ֤ǥ³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ë¤â¤Î(¾¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë·²¤ÈƱ¤¸¾ì½ê¤Ë¤¢¤ë cjpeg.c
+  ¤È djpeg.c) ¤Ï¡¢°ìÅ٤˰ì¤Ä¤Î¥Õ¥¡¥¤¥ë¤·¤«ÊÑ´¹¤Ç¤­¤Ê¤¤¤â¤Î¤Ç¤¹¡£¤Ä¤Þ¤ê¡¢
+  ÆþÎÏ¥Õ¥¡¥¤¥ë¤Ï¥³¥Þ¥ó¥É¥é¥¤¥ó¾å¤Ë°ì¤Ä¤·¤«»ØÄê¤Ç¤­¤º¡¢½ÐÎϤÏɸ½à½ÐÎϤ«
+  -outfile ¥ª¥×¥·¥ç¥ó¤Ç»ØÄꤷ¤¿¥Õ¥¡¥¤¥ë¤Ë½ñ¤­½Ð¤µ¤ì¤Þ¤¹¡£³Æ¼ï¤Î UNIX ·Ï
+  OS ¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ìÍøÍѤµ¤ì¤Æ¤¤¤ë cjpeg/djpeg ¤Ï¤³¤Î¥¿¥¤¥×¤Î¤â¤Î¤Ç¤¹¡£
+
+  °ìÊý¡¢IJG ¤«¤é¸ø¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ¤¤¤ë MS-DOS ÈǤΠcjpeg/djpeg
+  (ftp://ftp.simtel.net/.2/simtelnet/msdos/graphics/jpeg6_b.zip) ¤Ç¤Ï¡¢
+  Ê£¿ô¤ÎÆþÎÏ¥Õ¥¡¥¤¥ë¤ò»ØÄê¤Ç¤­¡¢½ÐÎÏ¤ÏÆþÎÏ¥Õ¥¡¥¤¥ë¤ÈƱ¤¸¾ì½ê¤Ë¼«Æ°Åª¤Ë
+  ºî¤é¤ì¤Þ¤¹¡£¤³¤Î¥¿¥¤¥×¤Î cjpeg/djpeg ¤òºî¤ê¤¿¤±¤ì¤Ð¡¢altui/ ¤ÎÃæ¤Ë¤¢¤ë
+  cjpeg.c ¤È djpeg.c ¤ò¡¢¸µ¤«¤é¤¢¤ë(£±¥Õ¥¡¥¤¥ëÈǤÎ) cjpeg.c / djpeg.c ¤È
+  Æþ¤ìÂØ¤¨¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£¤³¤Î altui ÈǤΠcjpeg.c / djpeg.c ¤Ï¡¢
+  ¸µ¡¹ jpegaltui.v6b.tar.gz ¤È¤¤¤¦¥Õ¥¡¥¤¥ë̾¤ÇÇÛÉÛ¤µ¤ì¤Æ¤¤¤¿¤â¤Î¤ËÂФ·¤Æ
+  SIMD Âбþ²½¤Ë´Ø¤¹¤ë½¤Àµ¤ò²Ã¤¨¤¿¤â¤Î¤Ç¤¹¡£
+
+  ¤³¤ÎÊ£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg ¤ò Windows ·Ï¤Ê¤É¤ÎÈó UNIX ´Ä¶­¤Ç
+  »ÈÍѤ¹¤ë¾ì¹ç¡¢ÆþÎÏ¥Õ¥¡¥¤¥ë̾¤ò¥ï¥¤¥ë¥É¥«¡¼¥É¤Ç»ØÄê¤Ç¤­¤ë¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï¡¢
+  ³Æ¥³¥ó¥Ñ¥¤¥é¤Ë¸ÇÍ­¤ÎÆÃÊ̤ÊÀßÄ꤬ɬÍפˤʤë¾ì¹ç¤¬¤¢¤ê¤Þ¤¹¡£¤Ê¤¼¤Ê¤é¡¢
+  MS-DOS·Ï¡¿Windows·Ï¤Î´Ä¶­¤Ç¤Ï°ìÈ̤ˡ¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤Ï¥³¥ó¥Ñ¥¤¥é
+  ¤ËÉÕ°¤Î¥¹¥¿¡¼¥È¥¢¥Ã¥×¥³¡¼¥ÉÆâ¤Ç¹Ô¤Ê¤ï¤ì¤ë¤¿¤á¤Ç¤¹¡£
+
+  MinGW ¤ä DJGPP V.2 ¤Ê¤É¤Î¾ì¹ç¤Ï¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤ÏºÇ½é¤«¤éÍ­¸ú
+  ¤Ë¤Ê¤Ã¤Æ¤¤¤ë¤¿¤á¡¢ÆÃÊ̤ʤ³¤È¤ò¤·¤Ê¤¯¤Æ¤â¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë»ØÄê
+  ¤Ï¤Ç¤­¤Þ¤¹¡£Microsoft Visual C++ ¤ä Borland C++ ¤Î¾ì¹ç¤Ï¡¢ÉáÄÌ¡¢¥ï¥¤¥ë¥É
+  ¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤¹¤ë¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤Ç¤¢¤ë setargv.obj ¤ä
+  wildargs.obj ¤ò EXE ¥Õ¥¡¥¤¥ë¤Î¥ê¥ó¥¯»þ¤Ë¾¤Î¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤È°ì½ï¤Ë
+  ¥ê¥ó¥¯¤¹¤ë¤³¤È¤Ç¡¢¥ï¥¤¥ë¥É¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î SIMD ÈÇ
+  cjpeg/djpeg ¤Î¾ì¹ç¤Ï¡¢setargv.obj ¤ä wildargs.obj ¤ÎÆâÍÆ¤ËÁêÅö¤¹¤ë¥³¡¼¥É¤ò
+  cjpeg.c/djpeg.c ¤ËľÀܽñ¤­¹þ¤ó¤Ç¤¢¤ë¤¿¤á¡¢¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤ä Makefile
+  ¤Ë¾åµ­¤Î¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤òÄɲ䷤ʤ¯¤Æ¤â¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë
+  »ØÄ꤬¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¡£¤³¤ì¤é°Ê³°¤Î£Ã¥³¥ó¥Ñ¥¤¥é¤ò»ÈÍѤ·¤¿¾ì¹ç¤Ç¡¢
+  ¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë»ØÄ꤬¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢¥³¥ó¥Ñ¥¤¥é¤Î¥Þ¥Ë¥å¥¢¥ë
+  ¤ò»²¾È¤·¤Æ¡¢¥ï¥¤¥ë¥É¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤¹¤ëÀßÄê¤Ç¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+  °ìÊý¡¢linux ¤Ê¤É¤Î UNIX ´Ä¶­¤Ç¤Ï¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤Ï¥×¥í¥°¥é¥à¤¬
+  µ¯Æ°¤µ¤ì¤ëÁ°¤Ë¥³¥Þ¥ó¥É¥·¥§¥ë¤Ë¤è¤Ã¤Æ¹Ô¤Ê¤ï¤ì¤ë¤¿¤á¡¢¥³¥ó¥Ñ¥¤¥é¤ÎÀßÄê¤Ê¤É
+  ¤ÏɬÍפ¢¤ê¤Þ¤»¤ó¡£¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë̾»ØÄê¤Ï¾ï¤Ë»È¤¨¤Þ¤¹¡£
+
+
+¢£¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹¤Ë¤Ï
+
+  SIMD ¥³¡¼¥É¤òÉղä·¤¿¤¿¤á¡¢¤½¤Îʬ¤À¤±¥³¡¼¥É¥µ¥¤¥º¤¬Áý¤¨¤Æ¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢
+  JPEG library ¤ò°Ê²¼¤Î¤è¤¦¤Ê¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤Ǿï¤Ë»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ê¤é¤Ð¡¢
+  jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ(¥Þ¥¯¥í)¤ò°Ê²¼¤Î¤è¤¦¤ËÊѹ¹¤·¤Æ¥³¥ó¥Ñ¥¤¥ë
+  ¤¹¤ë¤³¤È¤Ç¡¢»ÈÍѤµ¤ì¤Ê¤¤¥³¡¼¥É¤ò½ü³°¤¹¤ë¤³¤È¤¬¤Ç¤­¡¢¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹
+  ¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£
+
+  ¡û cinfo.dct_method ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç
+
+    ¤³¤ÎÊÑ¿ô¤Ï DCT±é»»¤ÎÊýË¡¤ò»ØÄꤷ¡¢cjpeg/djpeg ¤Ç¤Ï -dct ¥ª¥×¥·¥ç¥ó¤Ë
+    Âбþ¤·¤Þ¤¹¡£¤³¤ÎÊÑ¿ô¤ÎÃͤϥǥե©¥ë¥È¤Ç¤Ï JDCT_ISLOW ¤Ç¡¢ÆÃ¤ËÍýͳ¤Î
+    ¤Ê¤¤¸Â¤ê¤³¤Î¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤ǻÈÍѤ¹¤ë¤³¤È¤ò¶¯¤¯¿ä¾©¤·¤Þ¤¹¡£°Ê²¼¤Î
+    ¥Þ¥¯¥í¤ò #undef ¤Ë¤¹¤ë¤³¤È¤Ç¡¢JDCT_ISLOW ¤Î¾õÂ֤ǤϷ褷¤Æ»ÈÍѤµ¤ì¤Ê¤¤
+    ¥³¡¼¥É¤ò½ü³°¤Ç¤­¤Þ¤¹¡£
+
+    #define DCT_IFAST_SUPPORTED  ->  #undef DCT_IFAST_SUPPORTED
+    #define DCT_FLOAT_SUPPORTED  ->  #undef DCT_FLOAT_SUPPORTED
+
+    ¤³¤ì¤À¤±¤Ç¤â¤«¤Ê¤ê¤Î¥³¡¼¥É¥µ¥¤¥º¤¬ºï¸º¤Ç¤­¤Þ¤¹¡£ÆÃ¤Ë DCT_FLOAT_SUPPORTED
+    ¤ò #undef ¤Ë¤¹¤ë¤È¡¢3DNow! ¤È SSE ¤Î¥µ¥Ý¡¼¥È¤â¼«Æ°Åª¤Ë̵¸ú¤Ë¤Ê¤ê¤Þ¤¹¡£
+
+  ¡ûŸ³«½èÍý¤Ç cinfo.do_fancy_upsampling ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç
+
+    ¤³¤ÎÊÑ¿ô¤Ï djpeg ¤Ç¤Ï -nosmooth ¥ª¥×¥·¥ç¥ó¤ËÁêÅö¤·¡¢-nosmooth ¤ò»ØÄê
+    ¤¹¤ë¤È FALSE ¤ËÀßÄꤵ¤ì¤Þ¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï TRUE ¤Ç¡¢¤³¤ì¤âÆÃ¤ËÍýͳ¤Î
+    ¤Ê¤¤¸Â¤ê¤³¤Î¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤ǻÈÍѤ¹¤ë¤³¤È¤ò¶¯¤¯¿ä¾©¤·¤Þ¤¹¡£°Ê²¼¤Î
+    ¥Þ¥¯¥í¤ò #undef ¤Ë¤¹¤ë¤³¤È¤Ç¡¢TRUE ¤Î¾õÂ֤ǤϷ褷¤Æ»ÈÍѤµ¤ì¤Ê¤¤¥³¡¼¥É¤ò
+    ½ü³°¤Ç¤­¤Þ¤¹¡£
+
+    #define UPSAMPLE_MERGING_SUPPORTED  ->  #undef UPSAMPLE_MERGING_SUPPORTED
+
+  ¡ûŸ³«½èÍý¤Ç cinfo.scale_num, cinfo.scale_denom ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç
+
+    ¤³¤ì¤ÏÍפ¹¤ë¤Ë¡ÖJPEG½Ì¾®Å¸³«¡×¤Îµ¡Ç½¤Ç¡¢djpeg ¤Ç¤Ï -scale M/N ¥ª¥×
+    ¥·¥ç¥ó¤ËÁêÅö¤·¤Þ¤¹¡£¥µ¥à¥Í¥¤¥ëºîÀ®¤Ê¤É¤Î¾ì¹ç¤ËÍøÍѤµ¤ì¤ë¤³¤È¤¬Â¿¤¤
+    µ¡Ç½¤Ç¤¹¤¬¡¢¤³¤ì¤ò¤Þ¤Ã¤¿¤¯»ÈÍѤ·¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ï¡¢°Ê²¼¤Î¥Þ¥¯¥í¤ò #undef
+    ¤Ë¤¹¤ë¤³¤È¤Ç¡¢¥³¡¼¥ÉÎ̤òºï¸º¤Ç¤­¤Þ¤¹¡£
+
+    #define IDCT_SCALING_SUPPORTED  ->  #undef IDCT_SCALING_SUPPORTED
+
+  Ãí°ÕÅÀ¤È¤·¤Æ¡¢¤³¤ì¤é¤ÎÀßÄêÊÑ¿ô¤¬¤É¤Î¤è¤¦¤Ê¾õÂ֤ǻȤï¤ì¤ë¤«Í½Â¬¤Ç¤­¤Ê¤¤
+  ¾ì¹ç¡¢¤¿¤È¤¨¤Ð¡¢¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºî¤ë¾ì¹ç¤Ê¤É
+  ¤Ï¡¢¤³¤¦¤¤¤Ã¤¿¥³¡¼¥Éºï¸º¤Ï¹Ô¤Ê¤¦¤Ù¤­¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£¥³¡¼¥Éºï¸º¤ò¹Ô¤Ê¤¦
+  ¤Î¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤Î»È¤ï¤ìÊý¤¬¤è¤¯¤ï¤«¤Ã¤Æ¤¤¤ëÆÃÄê¤Î¥¢¥×¥ê¥±¡¼¥·¥ç¥ó
+  ¤Ë¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Î¤ß¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+
+¢£ÆÃÄê¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï
+
+  ¤³¤ì¤Ï jconfig.h ¤ÎÃæÄø¤Ë¤¢¤ë¡¢#undef JSIMD_***_NOT_SUPPORTED ¤È¤¤¤¦
+  ¥Þ¥¯¥í¤ò #define ¤Ë¤¹¤ë¤³¤È¤Ç¼Â¸½¤Ç¤­¤Þ¤¹¡£configure ¥¹¥¯¥ê¥×¥È¤Ç
+  --disable-mmx ¤Ê¤É¤Î¥ª¥×¥·¥ç¥ó¤ò»ØÄꤷ¤¿¾ì¹ç¤Ï¡¢¤³¤Î¥Þ¥¯¥í¤Ï¼«Æ°Åª¤Ë
+  #define ¤µ¤ì¤Þ¤¹¡£
+
+  3DNow! ¤È SSE ¤Ï¸µ¡¹¡¢ÉâÆ°¾®¿ôÅÀDCT¤Ë¤·¤«ÍøÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¤Î¤Ç¡¢¾å½Ò¤Î
+  DCT_FLOAT_SUPPORTED ¤ò #undef ¤Ë¤·¤¿¤À¤±¤Ç¤Þ¤È¤á¤ÆÌµ¸ú¤Ë¤µ¤ì¤Þ¤¹¡£
+  MMX ¤È SSE2 ¤Ï¡¢°µ½ÌŸ³«½èÍý¤Î³Æ½ê¤ËÍøÍѤµ¤ì¤Æ¤¤¤Æ¡¢¹â®²½¤Ø¤Î¹×¸¥ÅÙ¤¬
+  ¹â¤¤¤Î¤Ç¡¢Í­¸ú¤Ë¤·¤Æ¤ª¤¯¤³¤È¤ò¤ªÁ¦¤á¤·¤Þ¤¹¤¬¡¢ÁȤ߹þ¤ßÍÑÅӤʤɡ¢¥³¡¼¥É
+  ¤òÁö¤é¤»¤ë¥×¥í¥»¥Ã¥µ¤Î¼ïÎब¤ï¤«¤Ã¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¡ÖÄ̾ïÍøÍѤµ¤ì¤Ê¤¤Êý¡×
+  ¤Î¥µ¥Ý¡¼¥È¤ò³°¤¹¤³¤È¤Ç¡¢¥³¡¼¥É¥µ¥¤¥º¤Îºï¸º¤¬¤Ç¤­¤Þ¤¹¡£
+
+
+
+[EOF]
diff --git a/simd_internal.ja.txt b/simd_internal.ja.txt
new file mode 100644 (file)
index 0000000..d234901
--- /dev/null
@@ -0,0 +1,293 @@
+Independent JPEG Group's JPEG software release 6b
+  with x86 SIMD extension for IJG JPEG library version 1.02
+    == INTERNAL ==
+-----------------------------------------------------------
+
+¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï
+
+  ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¡¢SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ¤ò
+  ²òÀ⤷¤Þ¤¹¡£SIMD ³ÈÄ¥Éôʬ¤ËÂФ·¤Æ²¿¤é¤«¤Î¼ê¤ò²Ã¤¨¤¿¤¤¾ì¹ç¤ä¡¢É¸½à¤Ç¤Ï
+  Âбþ¤·¤Æ¤¤¤Ê¤¤¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤ËÂбþ¤µ¤»¤¿¤¤¾ì¹ç¤Ê¤É¤Ï¡¢¤³¤³¤òÆÉ¤ó¤Ç
+  ¤¯¤À¤µ¤¤¡£
+
+    ¢£¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¡¿¸Æ¤Ó½Ð¤·µ¬Ìó(ABI)¤Î»ØÄê
+    ¢£OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (jsimdgcc.c / jsimddjg.asm / jsimdw32.asm)
+    ¢£¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤ÎºîÀ® (makecfg.c)
+    ¢£SIMD Ì¿Îá¤Î¼Â¹Ô»þ¤ÎÁªÂò¡¿SIMD Æ°ºî¥â¡¼¥É¾ðÊó
+    ¢£¤½¤Î¤Û¤«¤ÎÀßÄê¹àÌÜ¥Þ¥¯¥í
+      ¡û RGB_RED / RGB_GREEN / RGB_BLUE / RGB_PIXELSIZE
+      ¡û RGBX_FILLER_0XFF
+      ¡û JFDCT_INT_QUANTIZE_WITH_DIVISION
+      ¡û UPSAMPLE_H1V2_SUPPORTED
+
+
+¢£¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¡¿¸Æ¤Ó½Ð¤·µ¬Ìó(ABI)¤Î»ØÄê
+
+  ¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Ç½ñ¤«¤ì¤¿¥³¡¼¥É¤ò¡¢£Ã¸À¸ì¤Ê¤É¤Î¹âµé¸À¸ì¤Ç½ñ¤«¤ì¤¿¥³¡¼¥É
+  ¤È¥ê¥ó¥¯¤¹¤ë¤Ë¤Ï¡¢¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò°ìÃפµ¤»¤ë¤³¤È¡¢
+  ¤ª¤è¤Ó¡¢¸Æ¤Ó½Ð¤·µ¬Ìó¤Ê¤É¤Î¥Ð¥¤¥Ê¥ê¥³¡¼¥É¤Îµ¬Ìó(ABI)¤ò°ìÃפµ¤»¤ë¤³¤È¤¬
+  É¬ÍפǤ¹¡£¤³¤Î¥½¥Õ¥È¤Ç¤Ï¡¢¤³¤ì¤é¤Î»ØÄê¤ò¡¢¥¢¥»¥ó¥Ö¥é nasm ¤ËÍ¿¤¨¤ë¥ª¥×
+  ¥·¥ç¥ó¤Ç»ØÄꤷ¤Æ¤¤¤Þ¤¹¡£
+
+  ¡¦nasm -fwin32 -DWIN32 ...
+
+    Win32 ¤Î coff ¥Õ¥©¡¼¥Þ¥Ã¥È¡£Microsoft Visual C++ ¤ä MinGW¡¦CygWin
+    ¤Ê¤É¡¢Win32 ¥³¥ó¥Ñ¥¤¥é¤ÎÂçÉôʬ¤¬³ºÅö¡£
+
+  ¡¦nasm -fobj -DOBJ32 ...
+
+    Win32 ¤Î obj ¥Õ¥©¡¼¥Þ¥Ã¥È¡£¸µ¡¹¤Ï MS-DOS ¤Ç»È¤ï¤ì¤Æ¤¤¤¿ obj ·Á¼°
+    (MSOMF)¤ò 32bit ¤Ë³ÈÄ¥¤·¤¿¤â¤Î¡£Borland C++ Complier (Win32) ¤Ê¤É¡£
+
+  ¡¦nasm -felf -DELF ...
+
+    ³Æ¼ï¤Î UNIX ¤Ç¹­¤¯ºÎÍѤµ¤ì¤Æ¤¤¤ë ELF ¥Õ¥©¡¼¥Þ¥Ã¥È¡£linux ¤ä xBSD
+    ¥Õ¥¡¥ß¥ê¤Ê¤É¡¢¸½ºß¤Î UNIX ¤ÎÂçÉôʬ¤¬³ºÅö¡£
+
+  ¡¦nasm -faoutb -DAOUT ...
+
+    °ÊÁ°¤Î xBSD ¥Õ¥¡¥ß¥ê¤Ç»È¤ï¤ì¤Æ¤¤¤¿ a.out ¥Õ¥©¡¼¥Þ¥Ã¥È¡£
+
+  ¡¦nasm -fmacho -DMACHO ...
+
+    Darwin (MacOS X) ¤Ê¤É¤ÇºÎÍѤµ¤ì¤Æ¤¤¤ë Mach-O ¥Õ¥©¡¼¥Þ¥Ã¥È¡£
+    Ãí¡Ë-fmacho ¥ª¥×¥·¥ç¥ó¤Ï nasm 0.98.40 °Ê¹ß¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤¹¡£
+
+  ¡¦nasm -fcoff -DDJGPP ...
+
+    MS-DOS ¤Î DJGPP ¥³¥ó¥Ñ¥¤¥é¤Ç»È¤ï¤ì¤ë coff ¥Õ¥©¡¼¥Þ¥Ã¥È¡£
+
+  ¤³¤Î¤¦¤Á¡¢-f ¥ª¥×¥·¥ç¥ó¤Ï nasm ¤¬²ò¼á¤¹¤ë¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¤Î»ØÄê»Ò¤Ç¡¢
+  -D ¥ª¥×¥·¥ç¥ó(¥Þ¥¯¥í¤ÎÄêµÁ)¤Ï jsimdext.inc ¤ÎÃæ¤Ç²ò¼á¤µ¤ì¤ë¥Ð¥¤¥Ê¥êµ¬Ìó
+  (ABI)¤Î»ØÄê»Ò¤Ç¤¹¡£jsimdext.inc ¤Ç¤Ï¡¢-D ¥ª¥×¥·¥ç¥ó¤Ç¤Î¥Þ¥¯¥íÄêµÁ¤Ë½¾¤Ã¤Æ¡¢
+  ¥»¥°¥á¥ó¥È(¥»¥¯¥·¥ç¥ó)¤ÎÄêµÁ¤ä³°Éô̾̾Á°Áõ¾þ¤ÎÄêµÁ¤ò¹Ô¤Ê¤Ã¤Æ¤¤¤Þ¤¹¡£
+  ¾Ü¤·¤¯¤Ï jsimdext.inc ¤ò¤´Í÷¤¯¤À¤µ¤¤¡£
+
+  ELF ·Á¼° ¤ª¤è¤Ó a.out ·Á¼° ¤Î¾ì¹ç¡¢-DPIC ¤òÄɲ䷤ƻØÄꤹ¤ë¤È¥³¡¼¥É¤¬
+  Position Independent Code (°ÌÃÖÆÈΩ¥³¡¼¥É) ¤Ë¤Ê¤ê¤Þ¤¹¡£-DPIC ¤Ï
+  jsimdext.inc ¤ÎÃæ¤Ç²ò¼á¤µ¤ì¡¢¥³¡¼¥É¤ò PIC ¤Ë¤¹¤ë¤¿¤á¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤Þ¤¹¡£
+  Mach-O ·Á¼°¤Î¾ì¹ç¤Ï¡¢¥³¡¼¥É¤Ï¾ï¤Ë PIC ¤Ç¤¢¤ëɬÍפ¬¤¢¤ë¤¿¤á¡¢-DPIC ¤ò
+  »ØÄꤷ¤Ê¤¯¤Æ¤â¾ï¤Ë PIC ·Á¼°¤Î¥³¡¼¥É¤òÀ¸À®¤·¤Þ¤¹¡£
+
+  ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë±þ¤¸¤Æ¡¢¤³¤ì¤é¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤òÁªÂò¤¹¤ëɬÍפ¬¤¢¤ê
+  ¤Þ¤¹¡£Æ±º­¤Î makefile ¤Ç¤Ï¡¢¤¢¤é¤«¤¸¤áŬÀڤʤâ¤Î¤¬»ØÄꤵ¤ì¤Æ¤¤¤Þ¤¹¡£
+  configure ¥¹¥¯¥ê¥×¥È¤Ç¤Ï¡¢config.guess ¤¬½ÐÎϤ¹¤ë¥Û¥¹¥È¾ðÊó¤ò¸µ¤ËÁªÂò
+  ¤·¤Æ¤¤¤Þ¤¹¡£
+
+
+¢£OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (jsimdgcc.c / jsimddjg.asm / jsimdw32.asm)
+
+  SIMD Ì¿Îá¤ò¼Â¹Ô¤¹¤ë¤Ë¤Ï¡¢»öÁ°¤Î CPU ¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤À¤±¤Ç¤Ï¤Ê¤¯¡¢
+  OS ¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤âɬÍפǤ¹¡£ÆÃ¤Ë SSE/SSE2 ¤Ë¤Ä¤¤¤Æ¤Ï¡¢OS Â¦¤Ç
+  SSE/SSE2 Ì¿Îá¤ò¼Â¹Ô¤Ç¤­¤ë¤è¤¦¤Ë»öÁ°¤ËCPU¤òÀßÄꤹ¤ëɬÍפ¬¤¢¤ê¡¢¤½¤ì¤ò
+  ¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤ OS ¤Ç¤Ï¡¢OS ¤¬¥·¥ó¥°¥ë¥¿¥¹¥¯¤«¥Þ¥ë¥Á¥¿¥¹¥¯¤«¤Ë´Ø¤ï¤é¤º¡¢
+  SSE/SSE2 ¤Ï¼Â¹Ô¤Ç¤­¤Þ¤»¤ó¡£¤µ¤é¤Ë¡¢¤¢¤Þ¤êÃΤé¤ì¤Æ¤¤¤Þ¤»¤ó¤¬¡¢CPU ¤Ë
+  Æâ¢¤µ¤ì¤¿ FPU (¿ôÃͱ黻¥×¥í¥»¥Ã¥µ) ¤ò»ÈÍѤ·¤Ê¤¤(¥¨¥ß¥å¥ì¡¼¥È¤¹¤ë)ÀßÄê
+  ¤Ë¤Ê¤Ã¤Æ¤¤¤ë¤È¡¢MMX ¤ä 3DNow! ¤â´Þ¤á¤¹¤Ù¤Æ¤Î SIMD Ì¿Îá¤ÏÁ´¤¯¼Â¹Ô¤Ç¤­
+  ¤Þ¤»¤ó¡£Íפ¹¤ë¤Ë¡¢SIMD Ì¿Îá¤ò¼Â¹Ô¤Ç¤­¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¤Ë¤Ï¡¢CPUID ¤Î
+  ¥Õ¥é¥°¤òÄ´¤Ù¤ë¤À¤±¤Ç¤ÏÉÔ½½Ê¬¤È¤¤¤¦¤³¤È¤Ç¤¹¡£
+
+  SIMD Ì¿Îá¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤È¤¤¤Ã¤Æ¤â¡¢Êݸî¥â¡¼¥É¤Çư¤¤¤Æ¤¤¤ë
+  ¥×¥í¥°¥é¥à¤Î¾ì¹ç¡¢CPU ¤ÎÀßÄê¥Õ¥é¥°¤Î¾õÂÖ¤òľÀܥ桼¥¶¡¦¥×¥í¥°¥é¥à¤«¤é
+  ÆÉ¤ß¤È¤ë¤³¤È¤¬¤Ç¤­¤Ê¤¤¤¿¤á¡¢SIMD Ì¿Îá¤ò»î¤·¤Ë¼Â¹Ô¤·¤Æ¤ß¤ÆÌµ¸úÌ¿ÎáÎã³°¤¬
+  È¯À¸¤¹¤ë¤«¤É¤¦¤«¤ò³Î¤«¤á¤ë¤È¤¤¤¦¡¢°Ü¿¢À­¤Î°­¤¤´ÖÀÜŪ¤ÊÊýË¡¤òºÎ¤é¤¶¤ë¤ò
+  ÆÀ¤Ê¤¤¤Î¤¬¸½¾õ¤Ç¤¹¡£
+
+  ¤³¤Î SIMD Ì¿Îá¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤ò¤ä¤Ã¤Æ¤¤¤ë¤Î¤¬¡¢jsimdgcc.c /
+  jsimddjg.asm / jsimdw32.asm ¤Î£³¤Ä¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Ç¤¹¡£¤½¤ì¤¾¤ì¡¢
+  UNIX/gccÍÑ¡¢DJGPPÍÑ¡¢Win32ÍѤǤ¹¡£jsimdgcc.c ¤Ç¤Ï¡¢Îã³°¤ÎȯÀ¸¤ò
+  signal() ´Ø¿ô¤Î¥·¥°¥Ê¥ë¥Ï¥ó¥É¥é¤ÇÊá¤Þ¤¨¤Æ¤¤¤Þ¤¹¡£¥³¡¼¥É¤Î°ìÉô¤Ë gcc ¤Î
+  ¥¤¥ó¥é¥¤¥ó¥¢¥»¥ó¥Ö¥é¤ò»È¤Ã¤Æ¤¤¤ë¤¿¤á¡¢gcc ÀìÍѤǤ¹¡£gcc °Ê³°¤Ç¤â
+  ¥³¥ó¥Ñ¥¤¥ë¤Ï¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î¾ì¹ç¤Ï SIMD Ì¿Îá¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤Ï
+  ¹Ô¤Ê¤ï¤ì¤Þ¤»¤ó¡£jsimddjg.asm ¤Ï DPMI ¤ÎÎã³°½èÍýµ¡¹½¤òľÀÜÍøÍѤ·¤¿
+  ÊýË¡¤Ç¡¢jsimdw32.asm ¤Ï Win32 ¤ÎÎã³°½èÍýµ¡¹½¤òľÀÜÍøÍѤ·¤¿ÊýË¡¤Ç¤¹¡£
+
+  ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë±þ¤¸¤Æ¡¢¤³¤Î£³¼ïÎà¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤òÁªÂò¤¹¤ëɬÍפ¬
+  ¤¢¤ê¤Þ¤¹¡£Æ±º­¤Î makefile ¤Ç¤Ï¡¢¤¢¤é¤«¤¸¤áŬÀڤʤâ¤Î¤¬»ØÄꤵ¤ì¤Æ¤¤¤Þ¤¹¡£
+  configure ¥¹¥¯¥ê¥×¥È¤Ç¤Ï¡¢config.guess ¤¬½ÐÎϤ¹¤ë¥Û¥¹¥È¾ðÊó¤ò¸µ¤ËÁªÂò
+  ¤·¤Æ¤¤¤Þ¤¹¡£
+
+  ¤³¤Î£³¼ïÎà¤Î¤É¤ì¤È¤âŬ¹ç¤·¤Ê¤¤¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Î¾ì¹ç¤Ï¡¢¿·¤¿¤Ê¥Á¥§¥Ã¥¯
+  ´Ø¿ô¤ò½ñ¤¯É¬Íפ¬¤¢¤ê¤Þ¤¹¡£¤Ç¤¹¤¬¡¢¤½¤Î OS ¤¬Á´¤Æ¤Î SIMD Ì¿Îá¤ò¥µ¥Ý¡¼¥È
+  ¤·¤Æ¤¤¤ë OS ¤Ç¤¢¤ë¤³¤È¤¬¤ï¤«¤Ã¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤³¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤Ï
+  ¾Êά¤¹¤ë¤³¤È¤â²Äǽ¤Ç¤¹¡£°Ê²¼¤Î¤è¤¦¤Ê¶õ¤Î´Ø¿ô¤Î¤ß¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤ò
+  ºî¤Ã¤Æ¾åµ­¤Î£³¤Ä¤Î¥Õ¥¡¥¤¥ë¤ÎÂå¤ï¤ê¤Ë»ÈÍѤ¹¤ë¤«¡¢¤â¤·¤¯¤Ï jcomapi.c ¤ò
+  ²þÊѤ·¤Æ jpeg_simd_os_support ¤Î¸Æ¤Ó½Ð¤·¤ò¥Ð¥¤¥Ñ¥¹¤¹¤ë¤è¤¦¤Ë¤¹¤ì¤Ð£Ï£Ë
+  ¤Ç¤¹¡£
+
+    GLOBAL(unsigned int)
+    jpeg_simd_os_support (unsigned int simd)
+    {
+      return simd;
+    }
+
+
+¢£¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤ÎºîÀ® (makecfg.c)
+
+  Åö¥½¥Õ¥È¤Î¾ì¹ç¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Î¥½¡¼¥¹¥³¡¼¥ÉÃæ¤«¤é£Ã¸À¸ì¤Î¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë
+  ¤Ë¤¢¤ë¾ðÊ󡢤¿¤È¤¨¤Ð¡¢¥×¥ê¥×¥í¥»¥Ã¥µ¥Þ¥¯¥í¤ÎÃͤ乽¤ÂΤÎÃæ¤Ë¤¢¤ëÊÑ¿ô¤Î
+  ¥ª¥Õ¥»¥Ã¥È¤Ê¤É¡¢¤òÃΤëɬÍפ¬¤¢¤ê¤Þ¤¹¡£makecfg.c ¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤Î
+  ¥³¥ó¥Ñ¥¤¥ë¤ËÀèΩ¤Ã¤Æ¥³¥ó¥Ñ¥¤¥ë¡¦¥ê¥ó¥¯¡¦¼Â¹Ô¤µ¤ì¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ì¦¤Î
+  ¥½¡¼¥¹¥³¡¼¥É¤ÇɬÍפȤʤë¾ðÊó¤ò¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc
+  ¤È¤·¤Æ½ÐÎϤ¹¤ëƯ¤­¤ò¤·¤Æ¤¤¤Þ¤¹¡£
+
+  Ãí°ÕÅÀ¤È¤·¤Æ¡¢makecfg.c ¤Ï JPEG ¥é¥¤¥Ö¥é¥ê¤Î¥½¡¼¥¹¥³¡¼¥É¤ÈƱ¤¸¥³¥ó¥Ñ¥¤¥ë
+  ¥ª¥×¥·¥ç¥ó¤Ç¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£ÆÃ¤Ë¡¢¹½Â¤ÂΤβò¼á(¥µ¥¤¥º
+  ¤Ê¤É)¤¬ JPEG ¥é¥¤¥Ö¥é¥êËÜÂΤΤâ¤Î¤È°Û¤Ê¤Ã¤Æ¤·¤Þ¤¦¤È¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤¬
+  ¥¯¥é¥Ã¥·¥å¤·¤Þ¤¹¡£
+
+
+¢£SIMD Ì¿Îá¤Î¼Â¹Ô»þ¤ÎÁªÂò¡¿SIMD Æ°ºî¥â¡¼¥É¾ðÊó
+
+  ¤³¤Î SIMD ³ÈÄ¥ÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤Ç¤Ï¡¢¥×¥í¥°¥é¥à¤Î¼Â¹Ô»þ¤ËÆÃÄê¤Î SIMD
+  Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤·¤¿¤ê¡¢³Æ½èÍýÃʳ¬¤Ç¤É¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤ÆÆ°ºî
+  ¤¹¤ë¤Î¤«¤òÇİ®¤Ç¤­¤ë»ÅÁȤߤ¬ÍѰդµ¤ì¤Æ¤¤¤Þ¤¹¡£
+
+  jpeg_simd_mask() ¤ò»È¤¦¤È¡¢ÆÃÄê¤Î SIMD Ì¿Îá¤ò¼Â¹Ô»þ¤Ë»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë
+  ¤Ç¤­¤Þ¤¹¡£
+
+    GLOBAL(unsigned)
+    jpeg_simd_mask (j_common_ptr cinfo, unsigned remove, unsigned add);
+
+  ¤³¤Î´Ø¿ô¤Ï¡¢³Æ SIMD Ì¿Î᥻¥Ã¥È¤ËÂбþ¤¹¤ë¥Þ¥¹¥¯¥Ó¥Ã¥È¤òÀßÄꡦÊѹ¹¤·¤Þ¤¹¡£
+  remove, add ¤½¤·¤ÆÌá¤êÃͤϡ¢³Æ SIMD Ì¿Îá¤ËÂбþ¤¹¤ë¥Ó¥Ã¥ÈÃÍ (JSIMD_MMX,
+  JSIMD_3DNOW, JSIMD_SSE, JSIMD_SSE2) ¤ò OR ±é»»¤ÇÁȤ߹ç¤ï¤»¤¿¤â¤Î¤Ç¤¹¡£
+
+  "¥Þ¥¹¥¯¥Ó¥Ã¥È" ¤Ï¡¢¹½Â¤ÂΠcinfo ¤ËÊÝ»ý¤µ¤ì¤ëÃͤǡ¢¤½¤Î½é´üÃͤϠ0 ¤Ç¤¹¡£
+  ¤½¤·¤Æ¤³¤Î´Ø¿ô¤Ï¡¢¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤ò°Ê²¼¤Î¤è¤¦¤Ë¹¹¿·(Áàºî)¤·¤Þ¤¹¡£
+
+    (¿·¤·¤¤¥Þ¥¹¥¯¥Ó¥Ã¥È) = ((¸Å¤¤¥Þ¥¹¥¯¥Ó¥Ã¥È) & ~remove) | add;
+
+  ¤½¤·¤Æ¡¢¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤¬ 1 ¤Ë¤µ¤ì¤¿ SIMD Ì¿Î᥻¥Ã¥È¤Ï¡¢¤¿¤È¤¨ CPU/OS
+  ¤ÇÂбþ¤·¤Æ¤¤¤Æ¤â»ÈÍѤµ¤ì¤Þ¤»¤ó¡£¤³¤Î´Ø¿ô¤Ï¡¢¤³¤Î´Ø¿ô¤ò¸Æ¤ÖľÁ°¤Þ¤ÇÀßÄê
+  ¤µ¤ì¤Æ¤¤¤¿¥Þ¥¹¥¯¥Ó¥Ã¥È¤òÊÖ¤·¤Þ¤¹¡£¤Ê¤Î¤Ç¡¢remove, add ¶¦¤Ë 0 ¤òÍ¿¤¨¤Æ
+  ´Ø¿ô¤ò¸Æ¤Ù¤Ð¡¢¸½ºßÀßÄꤵ¤ì¤Æ¤¤¤ë¥Þ¥¹¥¯¥Ó¥Ã¥È¤ò¼èÆÀ¤Ç¤­¤Þ¤¹¡£¤³¤Î´Ø¿ô¤Î
+  »ÈÍÑÎã¤Ï¡¢cjpeg.c, djpeg.c, jcomapi.c ¤Ë¤¢¤ê¤Þ¤¹¡£
+
+  ¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤ÎÃͤϡ¢¼ÂºÝ¤Ë¤Ï¹½Â¤ÂΠcinfo ¤Î output_gamma ¤â¤·¤¯¤Ï
+  input_gamma ÊÑ¿ô¤Î²¼°Ì¥Ó¥Ã¥È¤ÎÊݸ¤µ¤ì¤Æ¤¤¤Þ¤¹(¾Ü¤·¤¯¤Ï jcomapi.c ¤ò
+  »²¾È)¡£¤³¤ì¤Ï¡¢¹½Â¤ÂΠcinfo ¤Ë¿·¤¿¤ÊÊÑ¿ô¤òÄɲ䷤Ƥ·¤Þ¤¦¤È¥Ð¥¤¥Ê¥ê¸ß´¹
+  ¤¬Êø¤ì¤Æ¤·¤Þ¤¦¤¿¤á¤Ç¡¢¸½¾õ¤Ç¤Ï̤»ÈÍѤȻפï¤ì¤ë¾åµ­¤ÎÊÑ¿ô¤ò¡Ö´Ö¼Ú¤ê¡×
+  ¤·¤Æ¤¤¤Þ¤¹¡£
+
+  ¤Þ¤¿¡¢°Ê²¼¤Î´Ø¿ô·²¤ò»È¤¦¤È¡¢¥é¥¤¥Ö¥é¥êÆâÉô¤Î³Æ½èÍýÃʳ¬¤Ç¤É¤Î SIMD Ì¿Îá
+  ¤ò»ÈÍѤ·¤ÆÆ°ºî¤¹¤ë¤Î¤«¤òÇİ®¤Ç¤­¤Þ¤¹¡£
+
+    jpeg_simd_color_converter();   -> ¿§¶õ´ÖÊÑ´¹(RGB->YCbCr)
+    jpeg_simd_downsampler();       -> ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥°
+    jpeg_simd_forward_dct();       -> DCT½çÊÑ´¹
+    jpeg_simd_color_deconverter(); -> ¿§¶õ´ÖÊÑ´¹(YCbCr->RGB)
+    jpeg_simd_upsampler();         -> ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°
+    jpeg_simd_inverse_dct();       -> DCTµÕÊÑ´¹
+
+  ÊÖ¤¹Ãͤϡ¢ÉâÆ°¾®¿ôÅÀDCT½çÊÑ´¹/µÕÊÑ´¹ ¤Î¾ì¹ç¤Ï JSIMD_3DNOW ¤« JSIMD_SSE¡¢
+  ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï JSIMD_MMX ¤« JSIMD_SSE2 ¤òÊÖ¤·¤Þ¤¹¡£¤Þ¤¿¡¢0 ¤¬Ê֤äÆ
+  ¤­¤¿¾ì¹ç¤Ï SIMD Ì¿Î᥻¥Ã¥È¤Ï»È¤ï¤ì¤º¡¢½¾Íè¤Î¥ë¡¼¥Á¥ó¤¬»È¤ï¤ì¤ë¤³¤È¤ò
+  °ÕÌ£¤·¤Þ¤¹¡£
+
+  ¤³¤ì¤é¤Î´Ø¿ô¤Î¾Ü¤·¤¤»È¤¤Êý¤Ë¤Ä¤¤¤Æ¤Ï¡¢cjpeg.c, djpeg.c (»ÈÍÑÎã) ¤ò¤´Í÷
+  ¤¯¤À¤µ¤¤¡£
+
+  ¤Ê¤ª¡¢¤³¤ì¤é¤Î SIMD ¥Þ¥¹¥¯´Ø¿ô¡¿SIMD ¥â¡¼¥É¾ðÊó´Ø¿ô ¤¬É¬Íפʤ¤¾ì¹ç¤Ï¡¢
+  °Ê²¼¤Î¥Þ¥¯¥í¤ò jconfig.h ¤Ê¤É¤Ë´Þ¤á¤ë¤³¤È¤Ç¡¢¶Ï¤«¤Ç¤¹¤¬¥³¡¼¥É¥µ¥¤¥º¤¬
+  ÀáÌó¤Ç¤­¤Þ¤¹¡£
+
+    #define JSIMD_MASKFUNC_NOT_SUPPORTED
+    #define JSIMD_MODEINFO_NOT_SUPPORTED
+
+
+¢£¤½¤Î¤Û¤«¤ÎÀßÄê¹àÌÜ¥Þ¥¯¥í
+
+  ¡û RGB_RED / RGB_GREEN / RGB_BLUE / RGB_PIXELSIZE
+
+    ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¡¢¼è¤ê°·¤¦ RGB ·Á¼°²èÁü
+    ¥Ç¡¼¥¿¤Î RGB ¤Îʤӽç¤ä¥Ô¥¯¥»¥ë¥µ¥¤¥º¤òÀßÄꤷ¤Þ¤¹¡£¤³¤Î SIMD ³ÈÄ¥ÈÇ
+    ¤Ç¤âÊѹ¹¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¤¬¡¢RGB_PIXELSIZE ¤¬ 3 ¤« 4 ¤Î¾ì¹ç¤Î¤ß¡¢
+    SIMD ÈǤο§¶õ´ÖÊÑ´¹¥ë¡¼¥Á¥ó¤¬Í­¸ú¤Ë¤Ê¤ê¤Þ¤¹¡£¤½¤ì°Ê³°¤ÎÃͤˤ·¤¿¾ì¹ç¤Ï¡¢
+    SIMD ÈǤο§¶õ´ÖÊÑ´¹¥ë¡¼¥Á¥ó¤Ï¼«Æ°Åª¤Ë̵¸ú²½¤µ¤ì¤Æ¡¢½¾Íè¤Î¿§¶õ´ÖÊÑ´¹
+    ¥ë¡¼¥Á¥ó¤¬»È¤ï¤ì¤Þ¤¹(¤ä¤äÄ㮤ˤʤê¤Þ¤¹)¡£
+
+    ¤³¤ì¤é¤ÎÃͤòÊѹ¹¤¹¤ë¤³¤È¤Ç¡¢½ÐÎϤò 32bit/pixel ·Á¼°¤Ë¤·¤¿¤ê¡¢BMP ·Á¼°¤Ë
+    ¹ç¤ï¤»¤Æ¥Ô¥¯¥»¥ë¤ò BGR ½ç¤Ë¤·¤¿¤ê¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ê¤Þ¤¹¡£¤Ê¤ª¡¢
+    ¤³¤ì¤é¤ÎÃͤòÌ·½â¤¹¤ëÃͤˠ#define ¤·¤¿¾ì¹ç¤Ï¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¤è¤¦¤Ë¤·¤Æ
+    ¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢Ãí°Õ¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+  ¡û RGBX_FILLER_0XFF
+
+    ¤³¤ì¤â jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï #undef
+    ¤Ë¤Ê¤Ã¤Æ¤¤¤Þ¤¹¡£¾å¤Î RGB_PIXELSIZE ¤ò 4 ¤Ë¤·¤¿¾ì¹ç¡¢£±¤Ä¤Î¥Ô¥¯¥»¥ë
+    ¥Ç¡¼¥¿Ãæ¤Ë(RGB¤Î£³¥Ð¥¤¥È¤Î¾¤Ë);·×¤Ê£±¥Ð¥¤¥È¤¬Â¸ºß¤¹¤ë¤³¤È¤Ë¤Ê¤ê¤Þ¤¹¡£
+    ¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï¡¢¤³¤Î;·×¤Ê£±¥Ð¥¤¥È(filler byte)¤Ë¤Ï 0x00 ¤¬Ëä¤á¤é¤ì¤Æ
+    ½ÐÎϤµ¤ì¤Þ¤¹¤¬¡¢¤³¤Î RGBX_FILLER_0XFF ¤ò #define ¤¹¤ë¤È 0x00 ¤ÎÂå¤ï¤ê
+    ¤Ë 0xFF ¤¬ filler byte ¤ËËä¤á¤é¤ì¤Æ½ÐÎϤµ¤ì¤Þ¤¹¡£
+
+    ½ÐÎϤò 32bit/pixel ·Á¼°¤Ë¤·¤¿¾ì¹ç¤Ç¡¢filler byte ¤ò¥¢¥ë¥Õ¥¡¥Á¥ã¥Í¥ë
+    ¤È¤·¤Æ°·¤¤¤¿¤¤¾ì¹ç¤Ê¤É¤Ï¡¢RGBX_FILLER_0XFF ¤ò #define ¤¹¤ë¤ÈÅԹ礬Îɤ¤
+    ¾ì¹ç¤¬¤¢¤ë¤Ç¤·¤ç¤¦¡£
+
+    ¤Ê¤ª¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠJPEG ¥é¥¤¥Ö¥é¥ê¤Ç¤Ï¡¢¤³¤Î filler byte ¤Ë¤Ï²¿¤â
+    µÍ¤á¤é¤ì¤º¡¢¸µ¤ÎÃͤ¬¤½¤Î¤Þ¤ÞÊÝ»ý¤µ¤ì¤Þ¤¹¡£¤Ç¤¹¤¬¡¢SIMD ÈǤο§¶õ´ÖÊÑ´¹
+    ¥ë¡¼¥Á¥ó¤Ç¤Ï¸µ¤ÎÃͤòÊÝ»ý¤¹¤ë¤Ë¤Ï¼ê´Ö¤¬¤«¤«¤ë¤¿¤á¡¢¾ï¤Ë 0x00 ¤« 0xFF
+    ¤ÇËä¤á¤Æ½ÐÎϤ¹¤ë¤è¤¦¤Ë»ÅÍÍÊѹ¹¤ò¹Ô¤Ê¤¤¤Þ¤·¤¿¡£
+
+  ¡û JFDCT_INT_QUANTIZE_WITH_DIVISION
+
+    ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¡¢°µ½Ì½èÍý¤Ç¤Î DCT·¸¿ô¤Î
+    Î̻Ҳ½½èÍý¤ÎÊýË¡¤òÊѹ¹¤·¤Þ¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï #undef ¤Ç¡¢#undef ¤Î¾õÂÖ¤Î
+    Êý¤¬¹â®¤Ê¤Î¤Ç¡¢ÆÃ¤ËÍýͳ¤Î¤Ê¤¤¸Â¤ê #undef ¤Ç»ÈÍѤ¹¤ë¤³¤È¤ò¤ªÁ¦¤á¤¤¤¿¤·
+    ¤Þ¤¹¡£
+
+    DCT·¸¿ô¤ÎÎ̻Ҳ½½èÍý¤È¤¤¤¦¤Î¤Ï¡¢Ã¼Åª¤Ë¸À¤¨¤Ð²èÁü¥Ç¡¼¥¿¤ËÂФ·¤Æ°ì¤Ä°ì¤Ä
+    ³ä¤ê»»(À°¿ô½ü»»)¤ò¼Â¹Ô¤¹¤ë¤³¤È¤Ç¤¹¡£¤Ç¤¹¤¬¡¢½ü»»¤Ï¸¶ÍýŪ¤Ë¹â®²½¤¬
+    ÉÔ²Äǽ¤Ê¤Î¤Ç¡¢¤³¤Î SIMD ³ÈÄ¥ÈǤǤÏÀ°¿ô½ü»»¤ÎÂå¤ï¤ê¤ËÀ°¿ô¾è»»¤ò»ÈÍѤ·¤Æ
+    Î̻Ҳ½½èÍý¤ò¼Â¹Ô¤·¤Æ¤¤¤Þ¤¹¡£
+
+    ¤³¤ÎÀ°¿ô¾è»»¤òÂåÍѤ¹¤ëÊýË¡¤Ç¤â¡¢¹âÀºÅÙÀ°¿ôDCT/¹â®À°¿ôDCT¤ò»È¤Ã¤¿¾ì¹ç¤Ç¡¢
+    ¤«¤Ä¡¢0¡Á100 ¤Î¤¹¤Ù¤Æ¤Î°µ½Ì¥¯¥ª¥ê¥Æ¥£ÀßÄê¤Ç¥ª¥ê¥¸¥Ê¥ëÈǤÈÁ´¤¯Æ±¤¸·ë²Ì¤ò
+    ½Ð¤¹¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢°µ½Ì²è¼Á¤ò "¥¯¥ª¥ê¥Æ¥£" ¤Î»ØÉ¸¤ÇÀßÄê
+    ¤»¤º¡¢¥¯¥ª¥ê¥Æ¥£ 0 ¤è¤ê¤âÄã²è¼Á¤ÎÎ̻Ҳ½¥Æ¡¼¥Ö¥ë¤òľÀÜÍ¿¤¨¤Æ°µ½Ì¤·¤¿¾ì¹ç
+    ¤Ê¤É¤Ï¡¢±é»»ÅÓÃæ¤Î¿ôÃÍÈϰϤδط¸¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤë·ë²Ì¤¬½Ð¤ë¤³¤È
+    ¤âÈÝÄê¤Ç¤­¤Þ¤»¤ó¡£¤½¤¦¤¤¤Ã¤¿ÆÃ¼ì¤Ê¶­³¦¾ò·ï²¼¤Ç¤â¥ª¥ê¥¸¥Ê¥ëÈǤȤθߴ¹ÅÙ¤¬
+    ¹â¤¯¤Ê¤é¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¾ì¹ç¤Ê¤É¤Ë¤Ï¡¢¤³¤Î¹àÌܤò #define ¤Ë¤·¤Æ»ÈÍѤ·¤Æ
+    ¤¯¤À¤µ¤¤¡£¼ã´³Â®ÅÙ¤ÏÍî¤Á¤Þ¤¹¤¬¡¢½¾Íè¤É¤ª¤ê¡¢°ì¤Ä°ì¤Ä½ü»»¤ò¹Ô¤Ã¤ÆÎ̻Ҳ½
+    ½èÍý¤ò¹Ô¤¤¤Þ¤¹¡£
+
+    ¤â¤Ã¤È¤â¡¢¥¯¥ª¥ê¥Æ¥£ 0 ¤è¤ê¤âÄã²è¼Á¤ÎÀßÄê¤Ç°µ½Ì¤·¤Æ¤â¡¢¤Û¤È¤ó¤É¼ÂÍѤË
+    ¤Ê¤ê¤Þ¤»¤ó¤Î¤Ç¡¢¤³¤ÎÀ°¿ô¾è»»¤òÂåÍѤ¹¤ëÊýË¡¤Ç¤â¡¢ÌäÂê¤Ë¤Ê¤ë¤³¤È¤Ï¤Ê¤¤¤È
+    »×¤¤¤Þ¤¹¡£
+
+  ¡û UPSAMPLE_H1V2_SUPPORTED
+
+    ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ë¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG library ¤Ë¤Ï
+    Â¸ºß¤·¤Ê¤¤ÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¤¹¡£¤³¤ì¤Ï¡¢Y:1x2 Cb:1x1 Cr:1x1 (4:2:2) ¤Î
+    ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ¤ò»ý¤Ä JPEG ¥Õ¥¡¥¤¥ë¤ò¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG
+    library ¤è¤ê¤â¹â®¡¿¹â²è¼Á¤ËŸ³«¤Ç¤­¤ë¤è¤¦¤Ë¤¹¤ë¤â¤Î¤Ç¤¹¡£
+
+    ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 (4:2:2) ¤ò»ý¤Ä JPEG ¥Õ¥¡¥¤¥ë¤Ï¡¢
+    ¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤Ç¤âŸ³«¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°
+    Èæ¤ËÂбþ¤¹¤ë¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°¡¦¥ë¡¼¥Á¥ó¤¬´Êñ¤Ê¤â¤Î¤·¤«ÍѰդµ¤ì¤Æ
+    ¤¤¤Ê¤¤¤¿¤á¡¢Å¸³«Â®ÅÙ¤âÃÙ¤¯¡¢¤Þ¤¿¡¢¿§¤Î¶­Ìܤ¬¤Ï¤Ã¤­¤ê¤·¤Æ¤¤¤ë£Ã£Ç²èÁü
+    ¤Ê¤É¤Î¾ì¹ç¤Ï¥¸¥ã¥®¡¼¤¬ÌÜΩ¤Ã¤Æ¤·¤Þ¤¦¤³¤È¤¬¤¢¤ê¤Þ¤¹¡£¤³¤Î¹àÌܤòÍ­¸ú¤Ë
+    ¤¹¤ë¤³¤È¤Ç¡¢¤³¤Î¤è¤¦¤Ê¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 ¤ò»ý¤Ä
+    JPEG ¥Õ¥¡¥¤¥ë¤ò¹â®¤Ë¡¢¤Þ¤¿¡¢¥¸¥ã¥®¡¼¤¬ÌÜΩ¤¿¤Ê¤¤¤è¤¦¤Ë¹â²è¼Á¤ËŸ³«
+    ¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Þ¤¹¡£
+
+    ¤³¤Î¡¢¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 ¤Î JPEG ¥Õ¥¡¥¤¥ë¤Ï¡¢¤¢¤Þ¤ê
+    °ìÈÌŪ¤Ê¤â¤Î¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¤¬¡¢¥Ç¥£¥¸¥¿¥ë¥«¥á¥é¤Ê¤É¤¬½ÐÎϤ¹¤ë¤³¤È¤Î¿¤¤¡¢
+    ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:2x1 Cb:1x1 Cr:1x1 (4:2:2) ¤Î JPEG ¥Õ¥¡¥¤¥ë¤ËÂФ·¤Æ
+    ¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×½èÍý¤ò¹Ô¤Ê¤¦¤È¡¢¤³¤Î Y:1x2 Cb:1x1 Cr:1x1 ¤Î JPEG
+    ¥Õ¥¡¥¤¥ë¤Ë¤Ê¤ê¤Þ¤¹¡£¥Ç¥£¥¸¥¿¥ë¥«¥á¥é¤Ç¡¢¥«¥á¥é¤ò½Ä¤Ë¤·¤Æ¡Ê½Ä°ÌÃ֤ǡ˻£±Æ
+    ¤·¤¿²èÁü¤ò¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×¤·¤ÆÀµ¾ï¤Ê¸þ¤­¤Ëľ¤¹¡¢¤Ê¤É¤È¤¤¤¦¤³¤È¤Ï¡¢
+    ¤è¤¯¤ä¤ë¤³¤È¤À¤È»×¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢¤³¤Î¤è¤¦¤Ê¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×¤µ¤ì¤¿
+    JPEG ¥Õ¥¡¥¤¥ë¤ò¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤ÇŸ³«¤¹¤ë¤È¡¢¾åµ­¤ÎÍýͳ¤«¤é¡¢
+    JPEG ¥Õ¥¡¥¤¥ë¤òŸ³«¤·¤Æ¤«¤é²èÁü½èÍý¥½¥Õ¥È¤Ç²óž¤µ¤»¤¿²èÁü¤ËÈæ¤Ù¤Æ²è¼Á¤¬
+    Îô¤Ã¤Æ¤·¤Þ¤¤¤Þ¤¹¡£¤³¤Î¹àÌܤòÍ­¸ú¤Ë¤¹¤ë¤³¤È¤Ç¡¢²èÁü½èÍý¥½¥Õ¥È¤Ç²óž¤µ¤»¤¿
+    ²èÁü¤È¤Û¤ÜƱ¤¸¥¯¥ª¥ê¥Æ¥£¤Ç²èÁü¤òŸ³«¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ê¤Þ¤¹¡£
+
+    ¤³¤Î¹àÌܤϡ¢ÆÃ¤ËÍýͳ¤Î¤Ê¤¤¸Â¤ê¡¢#define ¤Î¾õÂ֤ˤ·¤Æ¤ª¤¯¤³¤È¤ò¤ªÁ¦¤á
+    ¤¤¤¿¤·¤Þ¤¹¡£¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤È´°Á´¤ËƱ°ì¤Î·ë²Ì¤¬É¬ÍפÊ
+    ¾ì¹ç¤Î¤ß #undef ¤Ë¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+
+
+[EOF]
similarity index 100%
rename from jconfig.bcc
rename to unused/jconfig.bcc
similarity index 100%
rename from jconfig.mac
rename to unused/jconfig.mac
similarity index 100%
rename from jconfig.manx
rename to unused/jconfig.manx
similarity index 100%
rename from jconfig.mc6
rename to unused/jconfig.mc6
similarity index 100%
rename from jconfig.sas
rename to unused/jconfig.sas
similarity index 100%
rename from jconfig.st
rename to unused/jconfig.st
similarity index 100%
rename from jconfig.vms
rename to unused/jconfig.vms
similarity index 100%
rename from jconfig.wat
rename to unused/jconfig.wat
similarity index 100%
rename from jfdctflt.c
rename to unused/jfdctflt.c
similarity index 100%
rename from jfdctfst.c
rename to unused/jfdctfst.c
similarity index 100%
rename from jfdctint.c
rename to unused/jfdctint.c
similarity index 100%
rename from jidctflt.c
rename to unused/jidctflt.c
similarity index 100%
rename from jidctfst.c
rename to unused/jidctfst.c
similarity index 100%
rename from jidctint.c
rename to unused/jidctint.c
similarity index 100%
rename from jidctred.c
rename to unused/jidctred.c
similarity index 100%
rename from jmemdos.c
rename to unused/jmemdos.c
similarity index 100%
rename from jmemdosa.asm
rename to unused/jmemdosa.asm
similarity index 100%
rename from jmemmac.c
rename to unused/jmemmac.c
similarity index 100%
rename from makcjpeg.st
rename to unused/makcjpeg.st
similarity index 100%
rename from makdjpeg.st
rename to unused/makdjpeg.st
similarity index 100%
rename from makeapps.ds
rename to unused/makeapps.ds
similarity index 100%
rename from makefile.bcc
rename to unused/makefile.bcc
similarity index 100%
rename from makefile.manx
rename to unused/makefile.manx
similarity index 100%
rename from makefile.mc6
rename to unused/makefile.mc6
similarity index 100%
rename from makefile.mms
rename to unused/makefile.mms
similarity index 100%
rename from makefile.sas
rename to unused/makefile.sas
similarity index 100%
rename from makefile.vms
rename to unused/makefile.vms
similarity index 100%
rename from makefile.wat
rename to unused/makefile.wat
similarity index 100%
rename from makelib.ds
rename to unused/makelib.ds
similarity index 100%
rename from makeproj.mac
rename to unused/makeproj.mac
similarity index 100%
rename from makljpeg.st
rename to unused/makljpeg.st
similarity index 100%
rename from maktjpeg.st
rename to unused/maktjpeg.st
similarity index 100%
rename from makvms.opt
rename to unused/makvms.opt
diff --git a/unused/rdgif.c b/unused/rdgif.c
new file mode 100644 (file)
index 0000000..b27c167
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * rdgif.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to read input images in GIF format.
+ *
+ *****************************************************************************
+ * NOTE: to avoid entanglements with Unisys' patent on LZW compression,      *
+ * the ability to read GIF files has been removed from the IJG distribution. *
+ * Sorry about that.                                                         *
+ *****************************************************************************
+ *
+ * We are required to state that
+ *    "The Graphics Interchange Format(c) is the Copyright property of
+ *    CompuServe Incorporated. GIF(sm) is a Service Mark property of
+ *    CompuServe Incorporated."
+ */
+
+#include "cdjpeg.h"            /* Common decls for cjpeg/djpeg applications */
+
+#ifdef GIF_SUPPORTED
+
+/*
+ * The module selection routine for GIF format input.
+ */
+
+GLOBAL(cjpeg_source_ptr)
+jinit_read_gif (j_compress_ptr cinfo)
+{
+  fprintf(stderr, "GIF input is unsupported for legal reasons.  Sorry.\n");
+  exit(EXIT_FAILURE);
+  return NULL;                 /* keep compiler happy */
+}
+
+#endif /* GIF_SUPPORTED */
diff --git a/unused/wrgif.c b/unused/wrgif.c
new file mode 100644 (file)
index 0000000..5fe8328
--- /dev/null
@@ -0,0 +1,399 @@
+/*
+ * wrgif.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write output images in GIF format.
+ *
+ **************************************************************************
+ * NOTE: to avoid entanglements with Unisys' patent on LZW compression,   *
+ * this code has been modified to output "uncompressed GIF" files.        *
+ * There is no trace of the LZW algorithm in this file.                   *
+ **************************************************************************
+ *
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume output to
+ * an ordinary stdio stream.
+ */
+
+/*
+ * This code is loosely based on ppmtogif from the PBMPLUS distribution
+ * of Feb. 1991.  That file contains the following copyright notice:
+ *    Based on GIFENCODE by David Rowley <mgardi@watdscu.waterloo.edu>.
+ *    Lempel-Ziv compression based on "compress" by Spencer W. Thomas et al.
+ *    Copyright (C) 1989 by Jef Poskanzer.
+ *    Permission to use, copy, modify, and distribute this software and its
+ *    documentation for any purpose and without fee is hereby granted, provided
+ *    that the above copyright notice appear in all copies and that both that
+ *    copyright notice and this permission notice appear in supporting
+ *    documentation.  This software is provided "as is" without express or
+ *    implied warranty.
+ *
+ * We are also required to state that
+ *    "The Graphics Interchange Format(c) is the Copyright property of
+ *    CompuServe Incorporated. GIF(sm) is a Service Mark property of
+ *    CompuServe Incorporated."
+ */
+
+#include "cdjpeg.h"            /* Common decls for cjpeg/djpeg applications */
+
+#ifdef GIF_SUPPORTED
+
+
+/* Private version of data destination object */
+
+typedef struct {
+  struct djpeg_dest_struct pub;        /* public fields */
+
+  j_decompress_ptr cinfo;      /* back link saves passing separate parm */
+
+  /* State for packing variable-width codes into a bitstream */
+  int n_bits;                  /* current number of bits/code */
+  int maxcode;                 /* maximum code, given n_bits */
+  INT32 cur_accum;             /* holds bits not yet output */
+  int cur_bits;                        /* # of bits in cur_accum */
+
+  /* State for GIF code assignment */
+  int ClearCode;               /* clear code (doesn't change) */
+  int EOFCode;                 /* EOF code (ditto) */
+  int code_counter;            /* counts output symbols */
+
+  /* GIF data packet construction buffer */
+  int bytesinpkt;              /* # of bytes in current packet */
+  char packetbuf[256];         /* workspace for accumulating packet */
+
+} gif_dest_struct;
+
+typedef gif_dest_struct * gif_dest_ptr;
+
+/* Largest value that will fit in N bits */
+#define MAXCODE(n_bits)        ((1 << (n_bits)) - 1)
+
+
+/*
+ * Routines to package finished data bytes into GIF data blocks.
+ * A data block consists of a count byte (1..255) and that many data bytes.
+ */
+
+LOCAL(void)
+flush_packet (gif_dest_ptr dinfo)
+/* flush any accumulated data */
+{
+  if (dinfo->bytesinpkt > 0) { /* never write zero-length packet */
+    dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++;
+    if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt)
+       != (size_t) dinfo->bytesinpkt)
+      ERREXIT(dinfo->cinfo, JERR_FILE_WRITE);
+    dinfo->bytesinpkt = 0;
+  }
+}
+
+
+/* Add a character to current packet; flush to disk if necessary */
+#define CHAR_OUT(dinfo,c)  \
+       { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c);  \
+           if ((dinfo)->bytesinpkt >= 255)  \
+             flush_packet(dinfo);  \
+       }
+
+
+/* Routine to convert variable-width codes into a byte stream */
+
+LOCAL(void)
+output (gif_dest_ptr dinfo, int code)
+/* Emit a code of n_bits bits */
+/* Uses cur_accum and cur_bits to reblock into 8-bit bytes */
+{
+  dinfo->cur_accum |= ((INT32) code) << dinfo->cur_bits;
+  dinfo->cur_bits += dinfo->n_bits;
+
+  while (dinfo->cur_bits >= 8) {
+    CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF);
+    dinfo->cur_accum >>= 8;
+    dinfo->cur_bits -= 8;
+  }
+}
+
+
+/* The pseudo-compression algorithm.
+ *
+ * In this module we simply output each pixel value as a separate symbol;
+ * thus, no compression occurs.  In fact, there is expansion of one bit per
+ * pixel, because we use a symbol width one bit wider than the pixel width.
+ *
+ * GIF ordinarily uses variable-width symbols, and the decoder will expect
+ * to ratchet up the symbol width after a fixed number of symbols.
+ * To simplify the logic and keep the expansion penalty down, we emit a
+ * GIF Clear code to reset the decoder just before the width would ratchet up.
+ * Thus, all the symbols in the output file will have the same bit width.
+ * Note that emitting the Clear codes at the right times is a mere matter of
+ * counting output symbols and is in no way dependent on the LZW patent.
+ *
+ * With a small basic pixel width (low color count), Clear codes will be
+ * needed very frequently, causing the file to expand even more.  So this
+ * simplistic approach wouldn't work too well on bilevel images, for example.
+ * But for output of JPEG conversions the pixel width will usually be 8 bits
+ * (129 to 256 colors), so the overhead added by Clear symbols is only about
+ * one symbol in every 256.
+ */
+
+LOCAL(void)
+compress_init (gif_dest_ptr dinfo, int i_bits)
+/* Initialize pseudo-compressor */
+{
+  /* init all the state variables */
+  dinfo->n_bits = i_bits;
+  dinfo->maxcode = MAXCODE(dinfo->n_bits);
+  dinfo->ClearCode = (1 << (i_bits - 1));
+  dinfo->EOFCode = dinfo->ClearCode + 1;
+  dinfo->code_counter = dinfo->ClearCode + 2;
+  /* init output buffering vars */
+  dinfo->bytesinpkt = 0;
+  dinfo->cur_accum = 0;
+  dinfo->cur_bits = 0;
+  /* GIF specifies an initial Clear code */
+  output(dinfo, dinfo->ClearCode);
+}
+
+
+LOCAL(void)
+compress_pixel (gif_dest_ptr dinfo, int c)
+/* Accept and "compress" one pixel value.
+ * The given value must be less than n_bits wide.
+ */
+{
+  /* Output the given pixel value as a symbol. */
+  output(dinfo, c);
+  /* Issue Clear codes often enough to keep the reader from ratcheting up
+   * its symbol size.
+   */
+  if (dinfo->code_counter < dinfo->maxcode) {
+    dinfo->code_counter++;
+  } else {
+    output(dinfo, dinfo->ClearCode);
+    dinfo->code_counter = dinfo->ClearCode + 2;        /* reset the counter */
+  }
+}
+
+
+LOCAL(void)
+compress_term (gif_dest_ptr dinfo)
+/* Clean up at end */
+{
+  /* Send an EOF code */
+  output(dinfo, dinfo->EOFCode);
+  /* Flush the bit-packing buffer */
+  if (dinfo->cur_bits > 0) {
+    CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF);
+  }
+  /* Flush the packet buffer */
+  flush_packet(dinfo);
+}
+
+
+/* GIF header construction */
+
+
+LOCAL(void)
+put_word (gif_dest_ptr dinfo, unsigned int w)
+/* Emit a 16-bit word, LSB first */
+{
+  putc(w & 0xFF, dinfo->pub.output_file);
+  putc((w >> 8) & 0xFF, dinfo->pub.output_file);
+}
+
+
+LOCAL(void)
+put_3bytes (gif_dest_ptr dinfo, int val)
+/* Emit 3 copies of same byte value --- handy subr for colormap construction */
+{
+  putc(val, dinfo->pub.output_file);
+  putc(val, dinfo->pub.output_file);
+  putc(val, dinfo->pub.output_file);
+}
+
+
+LOCAL(void)
+emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap)
+/* Output the GIF file header, including color map */
+/* If colormap==NULL, synthesize a gray-scale colormap */
+{
+  int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte;
+  int cshift = dinfo->cinfo->data_precision - 8;
+  int i;
+
+  if (num_colors > 256)
+    ERREXIT1(dinfo->cinfo, JERR_TOO_MANY_COLORS, num_colors);
+  /* Compute bits/pixel and related values */
+  BitsPerPixel = 1;
+  while (num_colors > (1 << BitsPerPixel))
+    BitsPerPixel++;
+  ColorMapSize = 1 << BitsPerPixel;
+  if (BitsPerPixel <= 1)
+    InitCodeSize = 2;
+  else
+    InitCodeSize = BitsPerPixel;
+  /*
+   * Write the GIF header.
+   * Note that we generate a plain GIF87 header for maximum compatibility.
+   */
+  putc('G', dinfo->pub.output_file);
+  putc('I', dinfo->pub.output_file);
+  putc('F', dinfo->pub.output_file);
+  putc('8', dinfo->pub.output_file);
+  putc('7', dinfo->pub.output_file);
+  putc('a', dinfo->pub.output_file);
+  /* Write the Logical Screen Descriptor */
+  put_word(dinfo, (unsigned int) dinfo->cinfo->output_width);
+  put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
+  FlagByte = 0x80;             /* Yes, there is a global color table */
+  FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */
+  FlagByte |= (BitsPerPixel-1);        /* size of global color table */
+  putc(FlagByte, dinfo->pub.output_file);
+  putc(0, dinfo->pub.output_file); /* Background color index */
+  putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */
+  /* Write the Global Color Map */
+  /* If the color map is more than 8 bits precision, */
+  /* we reduce it to 8 bits by shifting */
+  for (i=0; i < ColorMapSize; i++) {
+    if (i < num_colors) {
+      if (colormap != NULL) {
+       if (dinfo->cinfo->out_color_space == JCS_RGB) {
+         /* Normal case: RGB color map */
+         putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file);
+         putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file);
+         putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file);
+       } else {
+         /* Grayscale "color map": possible if quantizing grayscale image */
+         put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift);
+       }
+      } else {
+       /* Create a gray-scale map of num_colors values, range 0..255 */
+       put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1));
+      }
+    } else {
+      /* fill out the map to a power of 2 */
+      put_3bytes(dinfo, 0);
+    }
+  }
+  /* Write image separator and Image Descriptor */
+  putc(',', dinfo->pub.output_file); /* separator */
+  put_word(dinfo, 0);          /* left/top offset */
+  put_word(dinfo, 0);
+  put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */
+  put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
+  /* flag byte: not interlaced, no local color map */
+  putc(0x00, dinfo->pub.output_file);
+  /* Write Initial Code Size byte */
+  putc(InitCodeSize, dinfo->pub.output_file);
+
+  /* Initialize for "compression" of image data */
+  compress_init(dinfo, InitCodeSize+1);
+}
+
+
+/*
+ * Startup: write the file header.
+ */
+
+METHODDEF(void)
+start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
+{
+  gif_dest_ptr dest = (gif_dest_ptr) dinfo;
+
+  if (cinfo->quantize_colors)
+    emit_header(dest, cinfo->actual_number_of_colors, cinfo->colormap);
+  else
+    emit_header(dest, 256, (JSAMPARRAY) NULL);
+}
+
+
+/*
+ * Write some pixel data.
+ * In this module rows_supplied will always be 1.
+ */
+
+METHODDEF(void)
+put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
+               JDIMENSION rows_supplied)
+{
+  gif_dest_ptr dest = (gif_dest_ptr) dinfo;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+
+  ptr = dest->pub.buffer[0];
+  for (col = cinfo->output_width; col > 0; col--) {
+    compress_pixel(dest, GETJSAMPLE(*ptr++));
+  }
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
+{
+  gif_dest_ptr dest = (gif_dest_ptr) dinfo;
+
+  /* Flush "compression" mechanism */
+  compress_term(dest);
+  /* Write a zero-length data block to end the series */
+  putc(0, dest->pub.output_file);
+  /* Write the GIF terminator mark */
+  putc(';', dest->pub.output_file);
+  /* Make sure we wrote the output file OK */
+  fflush(dest->pub.output_file);
+  if (ferror(dest->pub.output_file))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+
+/*
+ * The module selection routine for GIF format output.
+ */
+
+GLOBAL(djpeg_dest_ptr)
+jinit_write_gif (j_decompress_ptr cinfo)
+{
+  gif_dest_ptr dest;
+
+  /* Create module interface object, fill in method pointers */
+  dest = (gif_dest_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                 SIZEOF(gif_dest_struct));
+  dest->cinfo = cinfo;         /* make back link for subroutines */
+  dest->pub.start_output = start_output_gif;
+  dest->pub.put_pixel_rows = put_pixel_rows;
+  dest->pub.finish_output = finish_output_gif;
+
+  if (cinfo->out_color_space != JCS_GRAYSCALE &&
+      cinfo->out_color_space != JCS_RGB)
+    ERREXIT(cinfo, JERR_GIF_COLORSPACE);
+
+  /* Force quantization if color or if > 8 bits input */
+  if (cinfo->out_color_space != JCS_GRAYSCALE || cinfo->data_precision > 8) {
+    /* Force quantization to at most 256 colors */
+    cinfo->quantize_colors = TRUE;
+    if (cinfo->desired_number_of_colors > 256)
+      cinfo->desired_number_of_colors = 256;
+  }
+
+  /* Calculate output image dimensions so we can allocate space */
+  jpeg_calc_output_dimensions(cinfo);
+
+  if (cinfo->output_components != 1) /* safety check: just one component? */
+    ERREXIT(cinfo, JERR_GIF_BUG);
+
+  /* Create decompressor output buffer. */
+  dest->pub.buffer = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1);
+  dest->pub.buffer_height = 1;
+
+  return (djpeg_dest_ptr) dest;
+}
+
+#endif /* GIF_SUPPORTED */
diff --git a/vc6proj/apptest.dsp b/vc6proj/apptest.dsp
new file mode 100644 (file)
index 0000000..0f5c35b
--- /dev/null
@@ -0,0 +1,242 @@
+# Microsoft Developer Studio Project File - Name="apptest" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Generic Project" 0x010a
+
+CFG=apptest - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "apptest.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "apptest.mak" CFG="apptest - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "apptest - Win32 Release" ("Win32 (x86) Generic Project" ÍÑ)
+!MESSAGE "apptest - Win32 Debug" ("Win32 (x86) Generic Project" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+MTL=midl.exe
+
+!IF  "$(CFG)" == "apptest - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+# Begin Special Build Tool
+OutDir=.\Release
+SOURCE="$(InputPath)"
+PostBuild_Cmds=fc /b .\testimg.ppm $(OutDir)\testout.ppm       fc /b .\testimg.bmp $(OutDir)\testout.bmp       fc /b .\testimg.jpg $(OutDir)\testout.jpg       fc /b .\testimg.ppm $(OutDir)\testoutp.ppm      fc /b .\testimgp.jpg $(OutDir)\testoutp.jpg     fc /b .\testorig.jpg $(OutDir)\testoutt.jpg
+# End Special Build Tool
+
+!ELSEIF  "$(CFG)" == "apptest - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+# Begin Special Build Tool
+OutDir=.\Debug
+SOURCE="$(InputPath)"
+PostBuild_Cmds=fc /b .\testimg.ppm $(OutDir)\testout.ppm       fc /b .\testimg.bmp $(OutDir)\testout.bmp       fc /b .\testimg.jpg $(OutDir)\testout.jpg       fc /b .\testimg.ppm $(OutDir)\testoutp.ppm      fc /b .\testimgp.jpg $(OutDir)\testoutp.jpg     fc /b .\testorig.jpg $(OutDir)\testoutt.jpg
+# End Special Build Tool
+
+!ENDIF 
+
+# Begin Target
+
+# Name "apptest - Win32 Release"
+# Name "apptest - Win32 Debug"
+# Begin Group "Test Image Files"
+
+# PROP Default_Filter "*.jpg;*.bmp;*.ppm"
+# Begin Source File
+
+SOURCE=.\testimg.bmp
+# End Source File
+# Begin Source File
+
+SOURCE=.\testimg.jpg
+# End Source File
+# Begin Source File
+
+SOURCE=.\testimg.ppm
+
+!IF  "$(CFG)" == "apptest - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build
+InputDir=.
+OutDir=.\Release
+InputPath=.\testimg.ppm
+
+BuildCmds= \
+       echo $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \
+       $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \
+       echo $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \
+       $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \
+       
+
+"$(OutDir)\testout.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testoutp.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "apptest - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build - Testing - $(InputPath)
+InputDir=.
+OutDir=.\Debug
+InputPath=.\testimg.ppm
+
+BuildCmds= \
+       echo $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \
+       $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \
+       echo $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \
+       $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \
+       
+
+"$(OutDir)\testout.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testoutp.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\testimgp.jpg
+# End Source File
+# Begin Source File
+
+SOURCE=.\testorig.jpg
+
+!IF  "$(CFG)" == "apptest - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build
+InputDir=.
+OutDir=.\Release
+InputPath=.\testorig.jpg
+
+BuildCmds= \
+       echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \
+       $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \
+       echo $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \
+       $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \
+       
+
+"$(OutDir)\testout.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testout.bmp" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "apptest - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build - Testing - $(InputPath)
+InputDir=.
+OutDir=.\Debug
+InputPath=.\testorig.jpg
+
+BuildCmds= \
+       echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \
+       $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \
+       echo $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \
+       $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \
+       
+
+"$(OutDir)\testout.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testout.bmp" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\testprog.jpg
+
+!IF  "$(CFG)" == "apptest - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build
+InputDir=.
+OutDir=.\Release
+InputPath=.\testprog.jpg
+
+BuildCmds= \
+       echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \
+       $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \
+       echo $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \
+       $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \
+       
+
+"$(OutDir)\testoutp.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testoutt.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "apptest - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+# Begin Custom Build - Testing - $(InputPath)
+InputDir=.
+OutDir=.\Debug
+InputPath=.\testprog.jpg
+
+BuildCmds= \
+       echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \
+       $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \
+       echo $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \
+       $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \
+       
+
+"$(OutDir)\testoutp.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+
+"$(OutDir)\testoutt.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+   $(BuildCmds)
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/cjpeg.dsp b/vc6proj/cjpeg.dsp
new file mode 100644 (file)
index 0000000..573e619
--- /dev/null
@@ -0,0 +1,164 @@
+# Microsoft Developer Studio Project File - Name="cjpeg" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=cjpeg - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "cjpeg.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "cjpeg.mak" CFG="cjpeg - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "cjpeg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "cjpeg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "cjpeg - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ELSEIF  "$(CFG)" == "cjpeg - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ENDIF 
+
+# Begin Target
+
+# Name "cjpeg - Win32 Release"
+# Name "cjpeg - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\cdjpeg.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\cjpeg.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdbmp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdgif.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdppm.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdrle.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdswitch.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdtarga.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\cderror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\cdjpeg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmorecfg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpeglib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jversion.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/djpeg.dsp b/vc6proj/djpeg.dsp
new file mode 100644 (file)
index 0000000..156b378
--- /dev/null
@@ -0,0 +1,164 @@
+# Microsoft Developer Studio Project File - Name="djpeg" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=djpeg - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "djpeg.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "djpeg.mak" CFG="djpeg - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "djpeg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "djpeg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "djpeg - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ELSEIF  "$(CFG)" == "djpeg - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ENDIF 
+
+# Begin Target
+
+# Name "djpeg - Win32 Release"
+# Name "djpeg - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\cdjpeg.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\djpeg.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdcolmap.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\wrbmp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\wrgif.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\wrppm.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\wrrle.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\wrtarga.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\cderror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\cdjpeg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmorecfg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpeglib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jversion.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/jconfig.h b/vc6proj/jconfig.h
new file mode 100644 (file)
index 0000000..d5bc9f9
--- /dev/null
@@ -0,0 +1,48 @@
+/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS       /* we presume a 32-bit flat memory model */
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#define TYPEDEF_UCHAR_BOOLEAN
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)
+#undef JSIMD_MMX_NOT_SUPPORTED
+#undef JSIMD_3DNOW_NOT_SUPPORTED
+#undef JSIMD_SSE_NOT_SUPPORTED
+#undef JSIMD_SSE2_NOT_SUPPORTED
+#endif
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED          /* BMP image file format */
+#define GIF_SUPPORTED          /* GIF image file format */
+#define PPM_SUPPORTED          /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED           /* Utah RLE image file format */
+#define TARGA_SUPPORTED                /* Targa image file format */
+
+#define TWO_FILE_COMMANDLINE   /* optional */
+#define USE_SETMODE            /* Microsoft has setmode() */
+#undef NEED_SIGNAL_CATCHER
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT         /* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/vc6proj/jpegtran.dsp b/vc6proj/jpegtran.dsp
new file mode 100644 (file)
index 0000000..8dc38d4
--- /dev/null
@@ -0,0 +1,156 @@
+# Microsoft Developer Studio Project File - Name="jpegtran" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=jpegtran - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "jpegtran.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "jpegtran.mak" CFG="jpegtran - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "jpegtran - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "jpegtran - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "jpegtran - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ELSEIF  "$(CFG)" == "jpegtran - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ENDIF 
+
+# Begin Target
+
+# Name "jpegtran - Win32 Release"
+# Name "jpegtran - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\cdjpeg.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpegtran.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\rdswitch.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\transupp.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\cderror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\cdjpeg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmorecfg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpegint.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpeglib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jversion.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\transupp.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/libjpeg.dsp b/vc6proj/libjpeg.dsp
new file mode 100644 (file)
index 0000000..59647d0
--- /dev/null
@@ -0,0 +1,1751 @@
+# Microsoft Developer Studio Project File - Name="libjpeg" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Static Library" 0x0104
+
+CFG=libjpeg - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "libjpeg.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "libjpeg.mak" CFG="libjpeg - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "libjpeg - Win32 Release" ("Win32 (x86) Static Library" ÍÑ)
+!MESSAGE "libjpeg - Win32 Debug" ("Win32 (x86) Static Library" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_LIB" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /D "WIN32" /D "NDEBUG" /D "_LIB" /YX /Zl /FD /GF /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /YX /Zl /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo
+
+!ENDIF 
+
+# Begin Target
+
+# Name "libjpeg - Win32 Release"
+# Name "libjpeg - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\jcapimin.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcapistd.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jccoefct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jccolor.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcdctmgr.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jchuff.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcinit.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcmainct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcmarker.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcmaster.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcomapi.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcparam.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcphuff.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcprepct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcsample.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jctrans.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdapimin.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdapistd.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdatadst.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdatasrc.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdcoefct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdcolor.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jddctmgr.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdhuff.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdinput.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmainct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmarker.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmaster.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmerge.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdphuff.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdpostct.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdsample.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdtrans.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmemmgr.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmemnobs.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jquant1.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jquant2.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\jutils.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\jchuff.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcolsamp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdct.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdhuff.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmemsys.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmorecfg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpegint.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpeglib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jversion.h
+# End Source File
+# End Group
+# Begin Group "NASM Source"
+
+# PROP Default_Filter "asm"
+# Begin Source File
+
+SOURCE=.\jccolmmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCCOL="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jccolmmx.asm
+InputName=jccolmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCCOL="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jccolmmx.asm
+InputName=jccolmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jccolss2.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCCOLS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jccolss2.asm
+InputName=jccolss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCCOLS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jccolss2.asm
+InputName=jccolss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqnt3dn.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqnt3dn.asm
+InputName=jcqnt3dn
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqnt3dn.asm
+InputName=jcqnt3dn
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqntflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqntflt.asm
+InputName=jcqntflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqntflt.asm
+InputName=jcqntflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqntint.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqntint.asm
+InputName=jcqntint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqntint.asm
+InputName=jcqntint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqntmmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTM="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqntmmx.asm
+InputName=jcqntmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTM="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqntmmx.asm
+InputName=jcqntmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqnts2f.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqnts2f.asm
+InputName=jcqnts2f
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqnts2f.asm
+InputName=jcqnts2f
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqnts2i.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTS2="$(IntDir)\jsimdcfg.inc"      "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqnts2i.asm
+InputName=jcqnts2i
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTS2="$(IntDir)\jsimdcfg.inc"      "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqnts2i.asm
+InputName=jcqnts2i
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcqntsse.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTSS="$(IntDir)\jsimdcfg.inc"      "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcqntsse.asm
+InputName=jcqntsse
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCQNTSS="$(IntDir)\jsimdcfg.inc"      "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcqntsse.asm
+InputName=jcqntsse
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcsammmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCSAM="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcsammmx.asm
+InputName=jcsammmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCSAM="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcsammmx.asm
+InputName=jcsammmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jcsamss2.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCSAMS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jcsamss2.asm
+InputName=jcsamss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JCSAMS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jcsamss2.asm
+InputName=jcsamss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdcolmmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDCOL="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdcolmmx.asm
+InputName=jdcolmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDCOL="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdcolmmx.asm
+InputName=jdcolmmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdcolss2.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDCOLS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdcolss2.asm
+InputName=jdcolss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDCOLS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdcolss2.asm
+InputName=jdcolss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmermmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDMER="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdmermmx.asm
+InputName=jdmermmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDMER="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdmermmx.asm
+InputName=jdmermmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdmerss2.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDMERS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdmerss2.asm
+InputName=jdmerss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDMERS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdmerss2.asm
+InputName=jdmerss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdsammmx.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDSAM="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdsammmx.asm
+InputName=jdsammmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDSAM="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdsammmx.asm
+InputName=jdsammmx
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdsamss2.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDSAMS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jdsamss2.asm
+InputName=jdsamss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JDSAMS="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jcolsamp.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jdsamss2.asm
+InputName=jdsamss2
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jf3dnflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JF3DN="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jf3dnflt.asm
+InputName=jf3dnflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JF3DN="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jf3dnflt.asm
+InputName=jf3dnflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfdctflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfdctflt.asm
+InputName=jfdctflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfdctflt.asm
+InputName=jfdctflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfdctfst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfdctfst.asm
+InputName=jfdctfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfdctfst.asm
+InputName=jfdctfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfdctint.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfdctint.asm
+InputName=jfdctint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFDCTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfdctint.asm
+InputName=jfdctint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfmmxfst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFMMX="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfmmxfst.asm
+InputName=jfmmxfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFMMX="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfmmxfst.asm
+InputName=jfmmxfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfmmxint.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFMMXI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfmmxint.asm
+InputName=jfmmxint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFMMXI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfmmxint.asm
+InputName=jfmmxint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfss2fst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSS2="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfss2fst.asm
+InputName=jfss2fst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSS2="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfss2fst.asm
+InputName=jfss2fst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfss2int.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSS2I="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfss2int.asm
+InputName=jfss2int
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSS2I="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfss2int.asm
+InputName=jfss2int
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jfsseflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSSE="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jfsseflt.asm
+InputName=jfsseflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JFSSE="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jfsseflt.asm
+InputName=jfsseflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\ji3dnflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JI3DN="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\ji3dnflt.asm
+InputName=ji3dnflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JI3DN="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\ji3dnflt.asm
+InputName=ji3dnflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jidctflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jidctflt.asm
+InputName=jidctflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCT="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jidctflt.asm
+InputName=jidctflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jidctfst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jidctfst.asm
+InputName=jidctfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTF="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jidctfst.asm
+InputName=jidctfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jidctint.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jidctint.asm
+InputName=jidctint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jidctint.asm
+InputName=jidctint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jidctred.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTR="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jidctred.asm
+InputName=jidctred
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIDCTR="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jidctred.asm
+InputName=jidctred
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jimmxfst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMX="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jimmxfst.asm
+InputName=jimmxfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMX="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jimmxfst.asm
+InputName=jimmxfst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jimmxint.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMXI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jimmxint.asm
+InputName=jimmxint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMXI="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jimmxint.asm
+InputName=jimmxint
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jimmxred.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMXR="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jimmxred.asm
+InputName=jimmxred
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JIMMXR="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jimmxred.asm
+InputName=jimmxred
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jiss2flt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jiss2flt.asm
+InputName=jiss2flt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jiss2flt.asm
+InputName=jiss2flt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jiss2fst.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2F="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jiss2fst.asm
+InputName=jiss2fst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2F="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jiss2fst.asm
+InputName=jiss2fst
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jiss2int.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2I="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jiss2int.asm
+InputName=jiss2int
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2I="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jiss2int.asm
+InputName=jiss2int
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jiss2red.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2R="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jiss2red.asm
+InputName=jiss2red
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISS2R="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jiss2red.asm
+InputName=jiss2red
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jisseflt.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISSE="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jisseflt.asm
+InputName=jisseflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JISSE="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  "jdct.inc"      
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jisseflt.asm
+InputName=jisseflt
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jsimdcpu.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JSIMD="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jsimdcpu.asm
+InputName=jsimdcpu
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JSIMD="$(IntDir)\jsimdcfg.inc"        "jsimdext.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jsimdcpu.asm
+InputName=jsimdcpu
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\jsimdw32.asm
+
+!IF  "$(CFG)" == "libjpeg - Win32 Release"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JSIMDW="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Release
+InputPath=.\jsimdw32.asm
+InputName=jsimdw32
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "libjpeg - Win32 Debug"
+
+# PROP Ignore_Default_Tool 1
+USERDEP__JSIMDW="$(IntDir)\jsimdcfg.inc"       "jsimdext.inc"  
+# Begin Custom Build - Assembling - $(InputPath)
+IntDir=.\Debug
+InputPath=.\jsimdw32.asm
+InputName=jsimdw32
+
+"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath)
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# End Group
+# Begin Group "NASM Header"
+
+# PROP Default_Filter "inc"
+# Begin Source File
+
+SOURCE=.\jcolsamp.inc
+# End Source File
+# Begin Source File
+
+SOURCE=.\jdct.inc
+# End Source File
+# Begin Source File
+
+SOURCE=.\jsimdext.inc
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/libjpeg.dsw b/vc6proj/libjpeg.dsw
new file mode 100644 (file)
index 0000000..4ace153
--- /dev/null
@@ -0,0 +1,134 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# ·Ù¹ð: ¤³¤Î\8eÜ\8e°\8e¸\8e½\8eÍ\8eß\8e°\8e½ \8eÌ\8e§\8e²\8eÙ ¤òÊÔ½¸¤Þ¤¿¤Ïºï½ü¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤!
+
+###############################################################################
+
+Project: "apptest"=".\apptest.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name cjpeg
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name djpeg
+    End Project Dependency
+    Begin Project Dependency
+    Project_Dep_Name jpegtran
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "cjpeg"=".\cjpeg.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libjpeg
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "djpeg"=".\djpeg.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libjpeg
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "jpegtran"=".\jpegtran.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libjpeg
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "libjpeg"=".\libjpeg.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name makecfg
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "makecfg"=".\makecfg.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "rdjpgcom"=".\rdjpgcom.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "wrjpgcom"=".\wrjpgcom.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/vc6proj/makecfg.dsp b/vc6proj/makecfg.dsp
new file mode 100644 (file)
index 0000000..dbe914a
--- /dev/null
@@ -0,0 +1,142 @@
+# Microsoft Developer Studio Project File - Name="makecfg" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=makecfg - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "makecfg.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "makecfg.mak" CFG="makecfg - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "makecfg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "makecfg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "makecfg - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 /nologo /subsystem:console /machine:I386 /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+# Begin Custom Build - Generating - $(OutDir)\jsimdcfg.inc
+OutDir=.\Release
+InputPath=.\Release\makecfg.exe
+SOURCE="$(InputPath)"
+
+"$(OutDir)\jsimdcfg.inc" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       $(TargetPath) > $(OutDir)\jsimdcfg.inc
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "makecfg - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+# Begin Custom Build - Generating - $(OutDir)\jsimdcfg.inc
+OutDir=.\Debug
+InputPath=.\Debug\makecfg.exe
+SOURCE="$(InputPath)"
+
+"$(OutDir)\jsimdcfg.inc" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+       $(TargetPath) > $(OutDir)\jsimdcfg.inc
+
+# End Custom Build
+
+!ENDIF 
+
+# Begin Target
+
+# Name "makecfg - Win32 Release"
+# Name "makecfg - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\makecfg.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jerror.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jmorecfg.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpegint.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jpeglib.h
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/rdjpgcom.dsp b/vc6proj/rdjpgcom.dsp
new file mode 100644 (file)
index 0000000..7a5eda2
--- /dev/null
@@ -0,0 +1,112 @@
+# Microsoft Developer Studio Project File - Name="rdjpgcom" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=rdjpgcom - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "rdjpgcom.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "rdjpgcom.mak" CFG="rdjpgcom - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "rdjpgcom - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "rdjpgcom - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "rdjpgcom - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 kernel32.lib /nologo /subsystem:console /machine:I386 /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ELSEIF  "$(CFG)" == "rdjpgcom - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ENDIF 
+
+# Begin Target
+
+# Name "rdjpgcom - Win32 Release"
+# Name "rdjpgcom - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\rdjpgcom.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/vc6proj/wrjpgcom.dsp b/vc6proj/wrjpgcom.dsp
new file mode 100644 (file)
index 0000000..7fdf9ec
--- /dev/null
@@ -0,0 +1,112 @@
+# Microsoft Developer Studio Project File - Name="wrjpgcom" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=wrjpgcom - Win32 Debug
+!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤Ê\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8e٤ǤϤ¢¤ê¤Þ¤»¤ó¡£ ¤³¤Î\8eÌ\8eß\8eÛ\8e¼\8eÞ\8eª\8e¸\8eĤò\8eË\8eÞ\8eÙ\8eÄ\8eÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£
+!MESSAGE [\8eÒ\8e²\8e¸\8eÌ\8e§\8e²\8eÙ¤Î\8e´\8e¸\8e½\8eÎ\8eß\8e°\8eÄ] \8eº\8eÏ\8eÝ\8eÄ\8eÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤
+!MESSAGE 
+!MESSAGE NMAKE /f "wrjpgcom.mak".
+!MESSAGE 
+!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹
+!MESSAGE \8eº\8eÏ\8eÝ\8eÄ\8eÞ \8e×\8e²\8eݾå¤Ç\8eÏ\8e¸\8eÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã:
+!MESSAGE 
+!MESSAGE NMAKE /f "wrjpgcom.mak" CFG="wrjpgcom - Win32 Debug"
+!MESSAGE 
+!MESSAGE ÁªÂò²Äǽ¤Ê\8eË\8eÞ\8eÙ\8eÄ\8eÞ \8eÓ\8e°\8eÄ\8eÞ:
+!MESSAGE 
+!MESSAGE "wrjpgcom - Win32 Release" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE "wrjpgcom - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ)
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "wrjpgcom - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
+# ADD BASE RSC /l 0x411 /d "NDEBUG"
+# ADD RSC /l 0x411 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 kernel32.lib /nologo /subsystem:console /machine:I386 /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ELSEIF  "$(CFG)" == "wrjpgcom - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x411 /d "_DEBUG"
+# ADD RSC /l 0x411 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98
+# SUBTRACT LINK32 /pdb:none
+
+!ENDIF 
+
+# Begin Target
+
+# Name "wrjpgcom - Win32 Release"
+# Name "wrjpgcom - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\wrjpgcom.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\jconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\jinclude.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/wrbmp.c b/wrbmp.c
index 3283b0f15c20ae86b64bc2d021b4f9760cb9de3e..517441a3d5ff80d583eb8e978985e87753b80419 100644 (file)
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -5,6 +5,13 @@
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
+ * ---------------------------------------------------------------------
+ * x86 SIMD extension for IJG JPEG library
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * This file has been modified to improve performance.
+ * Last Modified : October 19, 2004
+ * ---------------------------------------------------------------------
+ *
  * This file contains routines to write output images in Microsoft "BMP"
  * format (MS Windows 3.x and OS/2 1.x flavors).
  * Either 8-bit colormapped or 24-bit full-color format can be written.
@@ -346,9 +353,11 @@ finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
   bmp_dest_ptr dest = (bmp_dest_ptr) dinfo;
   register FILE * outfile = dest->pub.output_file;
   JSAMPARRAY image_ptr;
+#if (BITS_IN_JSAMPLE != 8) || defined(NEED_FAR_POINTERS)
   register JSAMPROW data_ptr;
-  JDIMENSION row;
   register JDIMENSION col;
+#endif
+  JDIMENSION row;
   cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
 
   /* Write the header and colormap */
@@ -366,11 +375,17 @@ finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
     }
     image_ptr = (*cinfo->mem->access_virt_sarray)
       ((j_common_ptr) cinfo, dest->whole_image, row-1, (JDIMENSION) 1, FALSE);
+#if (BITS_IN_JSAMPLE == 8) && !defined(NEED_FAR_POINTERS)
+    if (JFWRITE(outfile, image_ptr[0], dest->row_width)
+       != (size_t) dest->row_width)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+#else
     data_ptr = image_ptr[0];
     for (col = dest->row_width; col > 0; col--) {
       putc(GETJSAMPLE(*data_ptr), outfile);
       data_ptr++;
     }
+#endif
   }
   if (progress != NULL)
     progress->completed_extra_passes++;
diff --git a/wrgif.c b/wrgif.c
index 5fe8328391973ee0276cff75ae7fb07e6cc6cd44..85cfaa8bd7391f96a39ac5ccf862e7abf32ba44e 100644 (file)
--- a/wrgif.c
+++ b/wrgif.c
@@ -1,18 +1,17 @@
 /*
  * wrgif.c
  *
- * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 1991-1996, Thomas G. Lane.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
- * This file contains routines to write output images in GIF format.
- *
  **************************************************************************
- * NOTE: to avoid entanglements with Unisys' patent on LZW compression,   *
- * this code has been modified to output "uncompressed GIF" files.        *
- * There is no trace of the LZW algorithm in this file.                   *
+ * WARNING: You will need an LZW patent license from Unisys in order to   *
+ * use this file legally in any commercial or shareware application.      *
  **************************************************************************
  *
+ * This file contains routines to write output images in GIF format.
+ *
  * These routines may need modification for non-Unix environments or
  * specialized applications.  As they stand, they assume output to
  * an ordinary stdio stream.
 #ifdef GIF_SUPPORTED
 
 
+#define        MAX_LZW_BITS    12      /* maximum LZW code size (4096 symbols) */
+
+typedef INT16 code_int;                /* must hold -1 .. 2**MAX_LZW_BITS */
+
+#define LZW_TABLE_SIZE ((code_int) 1 << MAX_LZW_BITS)
+
+#define HSIZE          5003    /* hash table size for 80% occupancy */
+
+typedef int hash_int;          /* must hold -2*HSIZE..2*HSIZE */
+
+#define MAXCODE(n_bits)        (((code_int) 1 << (n_bits)) - 1)
+
+
+/*
+ * The LZW hash table consists of two parallel arrays:
+ *   hash_code[i]      code of symbol in slot i, or 0 if empty slot
+ *   hash_value[i]     symbol's value; undefined if empty slot
+ * where slot values (i) range from 0 to HSIZE-1.  The symbol value is
+ * its prefix symbol's code concatenated with its suffix character.
+ *
+ * Algorithm:  use open addressing double hashing (no chaining) on the
+ * prefix code / suffix character combination.  We do a variant of Knuth's
+ * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
+ * secondary probe.
+ *
+ * The hash_value[] table is allocated from FAR heap space since it would
+ * use up rather a lot of the near data space in a PC.
+ */
+
+typedef INT32 hash_entry;      /* must hold (code_int<<8) | byte */
+
+#define HASH_ENTRY(prefix,suffix)  ((((hash_entry) (prefix)) << 8) | (suffix))
+
+
 /* Private version of data destination object */
 
 typedef struct {
@@ -51,14 +84,23 @@ typedef struct {
 
   /* State for packing variable-width codes into a bitstream */
   int n_bits;                  /* current number of bits/code */
-  int maxcode;                 /* maximum code, given n_bits */
+  code_int maxcode;            /* maximum code, given n_bits */
+  int init_bits;               /* initial n_bits ... restored after clear */
   INT32 cur_accum;             /* holds bits not yet output */
   int cur_bits;                        /* # of bits in cur_accum */
 
-  /* State for GIF code assignment */
-  int ClearCode;               /* clear code (doesn't change) */
-  int EOFCode;                 /* EOF code (ditto) */
-  int code_counter;            /* counts output symbols */
+  /* LZW string construction */
+  code_int waiting_code;       /* symbol not yet output; may be extendable */
+  boolean first_byte;          /* if TRUE, waiting_code is not valid */
+
+  /* State for LZW code assignment */
+  code_int ClearCode;          /* clear code (doesn't change) */
+  code_int EOFCode;            /* EOF code (ditto) */
+  code_int free_code;          /* first not-yet-used symbol code */
+
+  /* LZW hash table */
+  code_int *hash_code;         /* => hash table of symbol codes */
+  hash_entry FAR *hash_value;  /* => hash table of symbol values */
 
   /* GIF data packet construction buffer */
   int bytesinpkt;              /* # of bytes in current packet */
@@ -68,12 +110,9 @@ typedef struct {
 
 typedef gif_dest_struct * gif_dest_ptr;
 
-/* Largest value that will fit in N bits */
-#define MAXCODE(n_bits)        ((1 << (n_bits)) - 1)
-
 
 /*
- * Routines to package finished data bytes into GIF data blocks.
+ * Routines to package compressed data bytes into GIF data blocks.
  * A data block consists of a count byte (1..255) and that many data bytes.
  */
 
@@ -102,7 +141,7 @@ flush_packet (gif_dest_ptr dinfo)
 /* Routine to convert variable-width codes into a byte stream */
 
 LOCAL(void)
-output (gif_dest_ptr dinfo, int code)
+output (gif_dest_ptr dinfo, code_int code)
 /* Emit a code of n_bits bits */
 /* Uses cur_accum and cur_bits to reblock into 8-bit bytes */
 {
@@ -114,67 +153,123 @@ output (gif_dest_ptr dinfo, int code)
     dinfo->cur_accum >>= 8;
     dinfo->cur_bits -= 8;
   }
+
+  /*
+   * If the next entry is going to be too big for the code size,
+   * then increase it, if possible.  We do this here to ensure
+   * that it's done in sync with the decoder's codesize increases.
+   */
+  if (dinfo->free_code > dinfo->maxcode) {
+    dinfo->n_bits++;
+    if (dinfo->n_bits == MAX_LZW_BITS)
+      dinfo->maxcode = LZW_TABLE_SIZE; /* free_code will never exceed this */
+    else
+      dinfo->maxcode = MAXCODE(dinfo->n_bits);
+  }
 }
 
 
-/* The pseudo-compression algorithm.
- *
- * In this module we simply output each pixel value as a separate symbol;
- * thus, no compression occurs.  In fact, there is expansion of one bit per
- * pixel, because we use a symbol width one bit wider than the pixel width.
- *
- * GIF ordinarily uses variable-width symbols, and the decoder will expect
- * to ratchet up the symbol width after a fixed number of symbols.
- * To simplify the logic and keep the expansion penalty down, we emit a
- * GIF Clear code to reset the decoder just before the width would ratchet up.
- * Thus, all the symbols in the output file will have the same bit width.
- * Note that emitting the Clear codes at the right times is a mere matter of
- * counting output symbols and is in no way dependent on the LZW patent.
- *
- * With a small basic pixel width (low color count), Clear codes will be
- * needed very frequently, causing the file to expand even more.  So this
- * simplistic approach wouldn't work too well on bilevel images, for example.
- * But for output of JPEG conversions the pixel width will usually be 8 bits
- * (129 to 256 colors), so the overhead added by Clear symbols is only about
- * one symbol in every 256.
- */
+/* The LZW algorithm proper */
+
+
+LOCAL(void)
+clear_hash (gif_dest_ptr dinfo)
+/* Fill the hash table with empty entries */
+{
+  /* It's sufficient to zero hash_code[] */
+  MEMZERO(dinfo->hash_code, HSIZE * SIZEOF(code_int));
+}
+
+
+LOCAL(void)
+clear_block (gif_dest_ptr dinfo)
+/* Reset compressor and issue a Clear code */
+{
+  clear_hash(dinfo);                   /* delete all the symbols */
+  dinfo->free_code = dinfo->ClearCode + 2;
+  output(dinfo, dinfo->ClearCode);     /* inform decoder */
+  dinfo->n_bits = dinfo->init_bits;    /* reset code size */
+  dinfo->maxcode = MAXCODE(dinfo->n_bits);
+}
+
 
 LOCAL(void)
 compress_init (gif_dest_ptr dinfo, int i_bits)
-/* Initialize pseudo-compressor */
+/* Initialize LZW compressor */
 {
   /* init all the state variables */
-  dinfo->n_bits = i_bits;
+  dinfo->n_bits = dinfo->init_bits = i_bits;
   dinfo->maxcode = MAXCODE(dinfo->n_bits);
-  dinfo->ClearCode = (1 << (i_bits - 1));
+  dinfo->ClearCode = ((code_int) 1 << (i_bits - 1));
   dinfo->EOFCode = dinfo->ClearCode + 1;
-  dinfo->code_counter = dinfo->ClearCode + 2;
+  dinfo->free_code = dinfo->ClearCode + 2;
+  dinfo->first_byte = TRUE;    /* no waiting symbol yet */
   /* init output buffering vars */
   dinfo->bytesinpkt = 0;
   dinfo->cur_accum = 0;
   dinfo->cur_bits = 0;
+  /* clear hash table */
+  clear_hash(dinfo);
   /* GIF specifies an initial Clear code */
   output(dinfo, dinfo->ClearCode);
 }
 
 
 LOCAL(void)
-compress_pixel (gif_dest_ptr dinfo, int c)
-/* Accept and "compress" one pixel value.
- * The given value must be less than n_bits wide.
- */
+compress_byte (gif_dest_ptr dinfo, int c)
+/* Accept and compress one 8-bit byte */
 {
-  /* Output the given pixel value as a symbol. */
-  output(dinfo, c);
-  /* Issue Clear codes often enough to keep the reader from ratcheting up
-   * its symbol size.
+  register hash_int i;
+  register hash_int disp;
+  register hash_entry probe_value;
+
+  if (dinfo->first_byte) {     /* need to initialize waiting_code */
+    dinfo->waiting_code = c;
+    dinfo->first_byte = FALSE;
+    return;
+  }
+
+  /* Probe hash table to see if a symbol exists for
+   * waiting_code followed by c.
+   * If so, replace waiting_code by that symbol and return.
    */
-  if (dinfo->code_counter < dinfo->maxcode) {
-    dinfo->code_counter++;
-  } else {
-    output(dinfo, dinfo->ClearCode);
-    dinfo->code_counter = dinfo->ClearCode + 2;        /* reset the counter */
+  i = ((hash_int) c << (MAX_LZW_BITS-8)) + dinfo->waiting_code;
+  /* i is less than twice 2**MAX_LZW_BITS, therefore less than twice HSIZE */
+  if (i >= HSIZE)
+    i -= HSIZE;
+
+  probe_value = HASH_ENTRY(dinfo->waiting_code, c);
+  
+  if (dinfo->hash_code[i] != 0) { /* is first probed slot empty? */
+    if (dinfo->hash_value[i] == probe_value) {
+      dinfo->waiting_code = dinfo->hash_code[i];
+      return;
+    }
+    if (i == 0)                        /* secondary hash (after G. Knott) */
+      disp = 1;
+    else
+      disp = HSIZE - i;
+    for (;;) {
+      i -= disp;
+      if (i < 0)
+       i += HSIZE;
+      if (dinfo->hash_code[i] == 0)
+       break;                  /* hit empty slot */
+      if (dinfo->hash_value[i] == probe_value) {
+       dinfo->waiting_code = dinfo->hash_code[i];
+       return;
+      }
+    }
   }
+
+  /* here when hashtable[i] is an empty slot; desired symbol not in table */
+  output(dinfo, dinfo->waiting_code);
+  if (dinfo->free_code < LZW_TABLE_SIZE) {
+    dinfo->hash_code[i] = dinfo->free_code++; /* add symbol to hashtable */
+    dinfo->hash_value[i] = probe_value;
+  } else
+    clear_block(dinfo);
+  dinfo->waiting_code = c;
 }
 
 
@@ -182,6 +277,9 @@ LOCAL(void)
 compress_term (gif_dest_ptr dinfo)
 /* Clean up at end */
 {
+  /* Flush out the buffered code */
+  if (! dinfo->first_byte)
+    output(dinfo, dinfo->waiting_code);
   /* Send an EOF code */
   output(dinfo, dinfo->EOFCode);
   /* Flush the bit-packing buffer */
@@ -289,7 +387,7 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap)
   /* Write Initial Code Size byte */
   putc(InitCodeSize, dinfo->pub.output_file);
 
-  /* Initialize for "compression" of image data */
+  /* Initialize for LZW compression of image data */
   compress_init(dinfo, InitCodeSize+1);
 }
 
@@ -325,7 +423,7 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
 
   ptr = dest->pub.buffer[0];
   for (col = cinfo->output_width; col > 0; col--) {
-    compress_pixel(dest, GETJSAMPLE(*ptr++));
+    compress_byte(dest, GETJSAMPLE(*ptr++));
   }
 }
 
@@ -339,7 +437,7 @@ finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
 {
   gif_dest_ptr dest = (gif_dest_ptr) dinfo;
 
-  /* Flush "compression" mechanism */
+  /* Flush LZW mechanism */
   compress_term(dest);
   /* Write a zero-length data block to end the series */
   putc(0, dest->pub.output_file);
@@ -393,6 +491,14 @@ jinit_write_gif (j_decompress_ptr cinfo)
     ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1);
   dest->pub.buffer_height = 1;
 
+  /* Allocate space for hash table */
+  dest->hash_code = (code_int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                               HSIZE * SIZEOF(code_int));
+  dest->hash_value = (hash_entry FAR *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                               HSIZE * SIZEOF(hash_entry));
+
   return (djpeg_dest_ptr) dest;
 }