From a2e6a9dd47eb10c701a42a16f305ded1a02cd886 Mon Sep 17 00:00:00 2001 From: MIYASAKA Masaru Date: Sat, 4 Feb 2006 00:00:00 +0000 Subject: [PATCH] IJG R6b with x86SIMD V1.02 Independent JPEG Group's JPEG software release 6b with x86 SIMD extension for IJG JPEG library version 1.02 --- aclocal.m4 | 3655 +++++++++++++++++ altui/README.alt | 71 + altui/cjpeg.c | 813 ++++ altui/djpeg.c | 836 ++++ altui/usage.alt | 62 + cjpeg.c | 62 + ckconfig.c | 22 + config.guess | 1458 +++++-- config.sub | 876 +++- config.ver | 44 + configure | 5449 ++++++++++++++++++++++++- configure.in | 634 +++ djpeg.c | 68 + install-sh | 495 ++- jccolmmx.asm | 513 +++ jccolor.c | 53 +- jccolss2.asm | 541 +++ jcdctmgr.c | 366 +- jcolsamp.h | 143 + jcolsamp.inc | 156 + jcomapi.c | 58 + jconfig.bc5 | 48 + jconfig.cfg | 12 + jconfig.dj | 9 + jconfig.linux | 44 + jconfig.mgw | 48 + jconfig.vc | 13 +- jcqnt3dn.asm | 240 ++ jcqntflt.asm | 202 + jcqntint.asm | 243 ++ jcqntmmx.asm | 254 ++ jcqnts2f.asm | 178 + jcqnts2i.asm | 216 + jcqntsse.asm | 218 + jcsammmx.asm | 328 ++ jcsample.c | 53 +- jcsamss2.asm | 355 ++ jdcoefct.c | 169 +- jdcolmmx.asm | 438 ++ jdcolor.c | 70 +- jdcolss2.asm | 536 +++ jdct.h | 216 +- jdct.inc | 125 + jddctmgr.c | 162 +- jdhuff.c | 268 +- jdhuff.h | 85 +- jdmerge.c | 107 +- jdmermmx.asm | 981 +++++ jdmerss2.asm | 1272 ++++++ jdphuff.c | 291 +- jdsammmx.asm | 893 ++++ jdsample.c | 208 +- jdsamss2.asm | 883 ++++ jf3dnflt.asm | 327 ++ jfdctflt.asm | 288 ++ jfdctfst.asm | 303 ++ jfdctint.asm | 342 ++ jfmmxfst.asm | 404 ++ jfmmxint.asm | 629 +++ jfss2fst.asm | 411 ++ jfss2int.asm | 641 +++ jfsseflt.asm | 383 ++ ji3dnflt.asm | 462 +++ jidctflt.asm | 473 +++ jidctfst.asm | 464 +++ jidctint.asm | 524 +++ jidctred.asm | 688 ++++ jimmxfst.asm | 510 +++ jimmxint.asm | 862 ++++ jimmxred.asm | 719 ++++ jiss2flt.asm | 508 +++ jiss2fst.asm | 512 +++ jiss2int.asm | 869 ++++ jiss2red.asm | 607 +++ jisseflt.asm | 582 +++ jmemmgr.c | 137 +- jmorecfg.h | 126 +- jpegdll.def | 73 + jpegdll.rc | 57 + jpegint.h | 26 + jpeglib.h | 61 + jsimdcpu.asm | 112 + jsimddjg.asm | 130 + jsimdext.inc | 347 ++ jsimdgcc.c | 95 + jsimdw32.asm | 121 + libjpeg.spec | 234 ++ ltconfig | 1512 ------- ltmain.sh | 5128 +++++++++++++++++------ makecfg.c | 300 ++ makefile.ansi | 107 +- makefile.bc5 | 320 ++ makefile.cfg | 172 +- makefile.dj | 140 +- makefile.linux | 449 ++ makefile.mgw | 298 ++ makefile.mgwdll | 310 ++ makefile.unix | 107 +- makefile.vc | 160 +- makefile.vcdll | 311 ++ nasm_lt.sh | 57 + rdbmp.c | 17 +- rdgif.c | 665 ++- simd_README.ja.txt | 145 + simd_cdjpeg.ja.txt | 75 + simd_changes.ja.txt | 24 + simd_filelist.ja.txt | 261 ++ simd_install.ja.txt | 436 ++ simd_internal.ja.txt | 293 ++ jconfig.bcc => unused/jconfig.bcc | 0 jconfig.mac => unused/jconfig.mac | 0 jconfig.manx => unused/jconfig.manx | 0 jconfig.mc6 => unused/jconfig.mc6 | 0 jconfig.sas => unused/jconfig.sas | 0 jconfig.st => unused/jconfig.st | 0 jconfig.vms => unused/jconfig.vms | 0 jconfig.wat => unused/jconfig.wat | 0 jfdctflt.c => unused/jfdctflt.c | 0 jfdctfst.c => unused/jfdctfst.c | 0 jfdctint.c => unused/jfdctint.c | 0 jidctflt.c => unused/jidctflt.c | 0 jidctfst.c => unused/jidctfst.c | 0 jidctint.c => unused/jidctint.c | 0 jidctred.c => unused/jidctred.c | 0 jmemdos.c => unused/jmemdos.c | 0 jmemdosa.asm => unused/jmemdosa.asm | 0 jmemmac.c => unused/jmemmac.c | 0 makcjpeg.st => unused/makcjpeg.st | 0 makdjpeg.st => unused/makdjpeg.st | 0 makeapps.ds => unused/makeapps.ds | 0 makefile.bcc => unused/makefile.bcc | 0 makefile.manx => unused/makefile.manx | 0 makefile.mc6 => unused/makefile.mc6 | 0 makefile.mms => unused/makefile.mms | 0 makefile.sas => unused/makefile.sas | 0 makefile.vms => unused/makefile.vms | 0 makefile.wat => unused/makefile.wat | 0 makelib.ds => unused/makelib.ds | 0 makeproj.mac => unused/makeproj.mac | 0 makljpeg.st => unused/makljpeg.st | 0 maktjpeg.st => unused/maktjpeg.st | 0 makvms.opt => unused/makvms.opt | 0 unused/rdgif.c | 38 + unused/wrgif.c | 399 ++ vc6proj/apptest.dsp | 242 ++ vc6proj/cjpeg.dsp | 164 + vc6proj/djpeg.dsp | 164 + vc6proj/jconfig.h | 48 + vc6proj/jpegtran.dsp | 156 + vc6proj/libjpeg.dsp | 1751 ++++++++ vc6proj/libjpeg.dsw | 134 + vc6proj/makecfg.dsp | 142 + vc6proj/rdjpgcom.dsp | 112 + vc6proj/wrjpgcom.dsp | 112 + wrbmp.c | 17 +- wrgif.c | 220 +- 156 files changed, 49144 insertions(+), 4409 deletions(-) create mode 100644 aclocal.m4 create mode 100644 altui/README.alt create mode 100644 altui/cjpeg.c create mode 100644 altui/djpeg.c create mode 100644 altui/usage.alt mode change 100755 => 100644 config.guess mode change 100755 => 100644 config.sub create mode 100644 config.ver create mode 100644 configure.in create mode 100644 jccolmmx.asm create mode 100644 jccolss2.asm create mode 100644 jcolsamp.h create mode 100644 jcolsamp.inc create mode 100644 jconfig.bc5 create mode 100644 jconfig.linux create mode 100644 jconfig.mgw create mode 100644 jcqnt3dn.asm create mode 100644 jcqntflt.asm create mode 100644 jcqntint.asm create mode 100644 jcqntmmx.asm create mode 100644 jcqnts2f.asm create mode 100644 jcqnts2i.asm create mode 100644 jcqntsse.asm create mode 100644 jcsammmx.asm create mode 100644 jcsamss2.asm create mode 100644 jdcolmmx.asm create mode 100644 jdcolss2.asm create mode 100644 jdct.inc create mode 100644 jdmermmx.asm create mode 100644 jdmerss2.asm create mode 100644 jdsammmx.asm create mode 100644 jdsamss2.asm create mode 100644 jf3dnflt.asm create mode 100644 jfdctflt.asm create mode 100644 jfdctfst.asm create mode 100644 jfdctint.asm create mode 100644 jfmmxfst.asm create mode 100644 jfmmxint.asm create mode 100644 jfss2fst.asm create mode 100644 jfss2int.asm create mode 100644 jfsseflt.asm create mode 100644 ji3dnflt.asm create mode 100644 jidctflt.asm create mode 100644 jidctfst.asm create mode 100644 jidctint.asm create mode 100644 jidctred.asm create mode 100644 jimmxfst.asm create mode 100644 jimmxint.asm create mode 100644 jimmxred.asm create mode 100644 jiss2flt.asm create mode 100644 jiss2fst.asm create mode 100644 jiss2int.asm create mode 100644 jiss2red.asm create mode 100644 jisseflt.asm create mode 100644 jpegdll.def create mode 100644 jpegdll.rc create mode 100644 jsimdcpu.asm create mode 100644 jsimddjg.asm create mode 100644 jsimdext.inc create mode 100644 jsimdgcc.c create mode 100644 jsimdw32.asm create mode 100644 libjpeg.spec delete mode 100755 ltconfig create mode 100644 makecfg.c create mode 100644 makefile.bc5 create mode 100644 makefile.linux create mode 100644 makefile.mgw create mode 100644 makefile.mgwdll create mode 100644 makefile.vcdll create mode 100644 nasm_lt.sh create mode 100644 simd_README.ja.txt create mode 100644 simd_cdjpeg.ja.txt create mode 100644 simd_changes.ja.txt create mode 100644 simd_filelist.ja.txt create mode 100644 simd_install.ja.txt create mode 100644 simd_internal.ja.txt rename jconfig.bcc => unused/jconfig.bcc (100%) rename jconfig.mac => unused/jconfig.mac (100%) rename jconfig.manx => unused/jconfig.manx (100%) rename jconfig.mc6 => unused/jconfig.mc6 (100%) rename jconfig.sas => unused/jconfig.sas (100%) rename jconfig.st => unused/jconfig.st (100%) rename jconfig.vms => unused/jconfig.vms (100%) rename jconfig.wat => unused/jconfig.wat (100%) rename jfdctflt.c => unused/jfdctflt.c (100%) rename jfdctfst.c => unused/jfdctfst.c (100%) rename jfdctint.c => unused/jfdctint.c (100%) rename jidctflt.c => unused/jidctflt.c (100%) rename jidctfst.c => unused/jidctfst.c (100%) rename jidctint.c => unused/jidctint.c (100%) rename jidctred.c => unused/jidctred.c (100%) rename jmemdos.c => unused/jmemdos.c (100%) rename jmemdosa.asm => unused/jmemdosa.asm (100%) rename jmemmac.c => unused/jmemmac.c (100%) rename makcjpeg.st => unused/makcjpeg.st (100%) rename makdjpeg.st => unused/makdjpeg.st (100%) rename makeapps.ds => unused/makeapps.ds (100%) rename makefile.bcc => unused/makefile.bcc (100%) rename makefile.manx => unused/makefile.manx (100%) rename makefile.mc6 => unused/makefile.mc6 (100%) rename makefile.mms => unused/makefile.mms (100%) rename makefile.sas => unused/makefile.sas (100%) rename makefile.vms => unused/makefile.vms (100%) rename makefile.wat => unused/makefile.wat (100%) rename makelib.ds => unused/makelib.ds (100%) rename makeproj.mac => unused/makeproj.mac (100%) rename makljpeg.st => unused/makljpeg.st (100%) rename maktjpeg.st => unused/maktjpeg.st (100%) rename makvms.opt => unused/makvms.opt (100%) create mode 100644 unused/rdgif.c create mode 100644 unused/wrgif.c create mode 100644 vc6proj/apptest.dsp create mode 100644 vc6proj/cjpeg.dsp create mode 100644 vc6proj/djpeg.dsp create mode 100644 vc6proj/jconfig.h create mode 100644 vc6proj/jpegtran.dsp create mode 100644 vc6proj/libjpeg.dsp create mode 100644 vc6proj/libjpeg.dsw create mode 100644 vc6proj/makecfg.dsp create mode 100644 vc6proj/rdjpgcom.dsp create mode 100644 vc6proj/wrjpgcom.dsp diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..54e986b --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,3655 @@ +# generated automatically by aclocal 1.8.5 -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 +# Free Software Foundation, Inc. +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +# libtool.m4 - Configure libtool for the host system. -*-Shell-script-*- + +# serial 46 AC_PROG_LIBTOOL + +AC_DEFUN([AC_PROG_LIBTOOL], +[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +# Prevent multiple expansion +define([AC_PROG_LIBTOOL], []) +]) + +AC_DEFUN([AC_LIBTOOL_SETUP], +[AC_PREREQ(2.13)dnl +AC_REQUIRE([AC_ENABLE_SHARED])dnl +AC_REQUIRE([AC_ENABLE_STATIC])dnl +AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_LD])dnl +AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl +AC_REQUIRE([AC_PROG_NM])dnl +AC_REQUIRE([LT_AC_PROG_SED])dnl + +AC_REQUIRE([AC_PROG_LN_S])dnl +AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl +AC_REQUIRE([AC_OBJEXT])dnl +AC_REQUIRE([AC_EXEEXT])dnl +dnl + +_LT_AC_PROG_ECHO_BACKSLASH +# Only perform the check for file, if the check method requires it +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + AC_PATH_MAGIC + fi + ;; +esac + +AC_CHECK_TOOL(RANLIB, ranlib, :) +AC_CHECK_TOOL(STRIP, strip, :) + +ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no) +ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], +enable_win32_dll=yes, enable_win32_dll=no) + +AC_ARG_ENABLE(libtool-lock, + [ --disable-libtool-lock avoid locking (might break parallel builds)]) +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +*-*-irix6*) + # Find out which ABI we are using. + echo '[#]line __oline__ "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_SAVE + AC_LANG_C + AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_RESTORE]) + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; + +ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], +[*-*-cygwin* | *-*-mingw* | *-*-pw32*) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one + AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain, + [AC_TRY_LINK([], + [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*); + DllMain (0, 0, 0);], + [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])]) + + case $host/$CC in + *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*) + # old mingw systems require "-dll" to link a DLL, while more recent ones + # require "-mdll" + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -mdll" + AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch, + [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])]) + CFLAGS="$SAVE_CFLAGS" ;; + *-*-cygwin* | *-*-pw32*) + # cygwin systems need to pass --dll to the linker, and not link + # crt.o which will require a WinMain@16 definition. + lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;; + esac + ;; + ]) +esac + +_LT_AC_LTCONFIG_HACK + +]) + +# AC_LIBTOOL_HEADER_ASSERT +# ------------------------ +AC_DEFUN([AC_LIBTOOL_HEADER_ASSERT], +[AC_CACHE_CHECK([whether $CC supports assert without backlinking], + [lt_cv_func_assert_works], + [case $host in + *-*-solaris*) + if test "$GCC" = yes && test "$with_gnu_ld" != yes; then + case `$CC --version 2>/dev/null` in + [[12]].*) lt_cv_func_assert_works=no ;; + *) lt_cv_func_assert_works=yes ;; + esac + fi + ;; + esac]) + +if test "x$lt_cv_func_assert_works" = xyes; then + AC_CHECK_HEADERS(assert.h) +fi +])# AC_LIBTOOL_HEADER_ASSERT + +# _LT_AC_CHECK_DLFCN +# -------------------- +AC_DEFUN([_LT_AC_CHECK_DLFCN], +[AC_CHECK_HEADERS(dlfcn.h) +])# _LT_AC_CHECK_DLFCN + +# AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE +# --------------------------------- +AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], +[AC_REQUIRE([AC_CANONICAL_HOST]) +AC_REQUIRE([AC_PROG_NM]) +AC_REQUIRE([AC_OBJEXT]) +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [dnl + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Transform the above into a raw symbol and a C symbol. +symxfrm='\1 \2\3 \3' + +# Transform an extracted symbol line into a proper C declaration +lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) # Its linker distinguishes data from code symbols + lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris* | sysv5*) + symcode='[[BDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# Handle CRLF in mingw tool chain +opt_cr= +case $host_os in +mingw*) + opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then + symcode='[[ABCDGISTW]]' +fi + +# Try without a prefix undercore, then with it. +for ac_symprfx in "" "_"; do + + # Write the raw and C identifiers. +lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'" + + # Check to see that the pipe works correctly. + pipe_works=no + rm -f conftest* + cat > conftest.$ac_ext < $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if egrep ' nm_test_var$' "$nlist" >/dev/null; then + if egrep ' nm_test_func$' "$nlist" >/dev/null; then + cat < conftest.$ac_ext +#ifdef __cplusplus +extern "C" { +#endif + +EOF + # Now generate the symbol file. + eval "$lt_cv_global_symbol_to_cdecl"' < "$nlist" >> conftest.$ac_ext' + + cat <> conftest.$ac_ext +#if defined (__STDC__) && __STDC__ +# define lt_ptr void * +#else +# define lt_ptr char * +# define const +#endif + +/* The mapping between symbol names and symbols. */ +const struct { + const char *name; + lt_ptr address; +} +lt_preloaded_symbols[[]] = +{ +EOF + sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext + cat <<\EOF >> conftest.$ac_ext + {0, (lt_ptr) 0} +}; + +#ifdef __cplusplus +} +#endif +EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$no_builtin_flag" + if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + else + echo "cannot find nm_test_func in $nlist" >&AC_FD_CC + fi + else + echo "cannot find nm_test_var in $nlist" >&AC_FD_CC + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AC_FD_CC + fi + else + echo "$progname: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&5 + fi + rm -f conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +global_symbol_pipe="$lt_cv_sys_global_symbol_pipe" +if test -z "$lt_cv_sys_global_symbol_pipe"; then + global_symbol_to_cdecl= + global_symbol_to_c_name_address= +else + global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl" + global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address" +fi +if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address"; +then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi +]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE + +# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR +# --------------------------------- +AC_DEFUN([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR], +[# Find the correct PATH separator. Usually this is `:', but +# DJGPP uses `;' like DOS. +if test "X${PATH_SEPARATOR+set}" != Xset; then + UNAME=${UNAME-`uname 2>/dev/null`} + case X$UNAME in + *-DOS) lt_cv_sys_path_separator=';' ;; + *) lt_cv_sys_path_separator=':' ;; + esac + PATH_SEPARATOR=$lt_cv_sys_path_separator +fi +])# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR + +# _LT_AC_PROG_ECHO_BACKSLASH +# -------------------------- +# Add some code to the start of the generated configure script which +# will find an echo command which doesn't interpret backslashes. +AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH], +[ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)], + [AC_DIVERT_PUSH(NOTICE)]) +_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR + +# Check that we are running under the correct shell. +SHELL=${CONFIG_SHELL-/bin/sh} + +case X$ECHO in +X*--fallback-echo) + # Remove one level of quotation (which was required for Make). + ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','` + ;; +esac + +echo=${ECHO-echo} +if test "X[$]1" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift +elif test "X[$]1" = X--fallback-echo; then + # Avoid inline document here, it may be left over + : +elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then + # Yippee, $echo works! + : +else + # Restart under the correct shell. + exec $SHELL "[$]0" --no-reexec ${1+"[$]@"} +fi + +if test "X[$]1" = X--fallback-echo; then + # used as fallback echo + shift + cat </dev/null && + echo_test_string="`eval $cmd`" && + (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null + then + break + fi + done +fi + +if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + : +else + # The Solaris, AIX, and Digital Unix default echo programs unquote + # backslashes. This makes it impossible to quote backslashes using + # echo "$something" | sed 's/\\/\\\\/g' + # + # So, first we look for a working echo in the user's PATH. + + IFS="${IFS= }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for dir in $PATH /usr/ucb; do + if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && + test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$dir/echo" + break + fi + done + IFS="$save_ifs" + + if test "X$echo" = Xecho; then + # We didn't find a better echo, so look for alternatives. + if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # This shell has a builtin print -r that does the trick. + echo='print -r' + elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && + test "X$CONFIG_SHELL" != X/bin/ksh; then + # If we have ksh, try running configure again with it. + ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh} + export ORIGINAL_CONFIG_SHELL + CONFIG_SHELL=/bin/ksh + export CONFIG_SHELL + exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"} + else + # Try using printf. + echo='printf %s\n' + if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # Cool, printf works + : + elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL + export CONFIG_SHELL + SHELL="$CONFIG_SHELL" + export SHELL + echo="$CONFIG_SHELL [$]0 --fallback-echo" + elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$CONFIG_SHELL [$]0 --fallback-echo" + else + # maybe with a smaller string... + prev=: + + for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do + if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null + then + break + fi + prev="$cmd" + done + + if test "$prev" != 'sed 50q "[$]0"'; then + echo_test_string=`eval $prev` + export echo_test_string + exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"} + else + # Oops. We lost completely, so just stick with echo. + echo=echo + fi + fi + fi + fi +fi +fi + +# Copy echo and quote the copy suitably for passing to libtool from +# the Makefile, instead of quoting the original, which is used later. +ECHO=$echo +if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then + ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo" +fi + +AC_SUBST(ECHO) +AC_DIVERT_POP +])# _LT_AC_PROG_ECHO_BACKSLASH + +# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ------------------------------------------------------------------ +AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF], +[if test "$cross_compiling" = yes; then : + [$4] +else + AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext < +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +#ifdef __cplusplus +extern "C" void exit (int); +#endif + +void fnord() { int i=42;} +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + /* dlclose (self); */ + } + + exit (status); +}] +EOF + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_unknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_AC_TRY_DLOPEN_SELF + +# AC_LIBTOOL_DLOPEN_SELF +# ------------------- +AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], +[if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + cygwin* | mingw* | pw32*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_AC_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test "x$lt_cv_dlopen_self" = xyes; then + LDFLAGS="$LDFLAGS $link_static_flag" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_AC_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +])# AC_LIBTOOL_DLOPEN_SELF + +AC_DEFUN([_LT_AC_LTCONFIG_HACK], +[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])dnl +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed='sed -e s/^X//' +sed_quote_subst='s/\([[\\"\\`$\\\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([[\\"\\`\\\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Constants: +rm="rm -f" + +# Global variables: +default_ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except M$VC, +# which needs '.lib'). +libext=a +ltmain="$ac_aux_dir/ltmain.sh" +ofile="$default_ofile" +with_gnu_ld="$lt_cv_prog_gnu_ld" +need_locks="$enable_libtool_lock" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$AR" && AR=ar +test -z "$AR_FLAGS" && AR_FLAGS=cru +test -z "$AS" && AS=as +test -z "$CC" && CC=cc +test -z "$DLLTOOL" && DLLTOOL=dlltool +test -z "$LD" && LD=ld +test -z "$LN_S" && LN_S="ln -s" +test -z "$MAGIC_CMD" && MAGIC_CMD=file +test -z "$NM" && NM=nm +test -z "$OBJDUMP" && OBJDUMP=objdump +test -z "$RANLIB" && RANLIB=: +test -z "$STRIP" && STRIP=: +test -z "$ac_objext" && ac_objext=o + +if test x"$host" != x"$build"; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + +# Transform linux* to *-*-linux-gnu*, to support old configure scripts. +case $host_os in +linux-gnu*) ;; +linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` +esac + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds" + ;; + *) + old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" +fi + +# Allow CC to be a program name with arguments. +set dummy $CC +compiler="[$]2" + +AC_MSG_CHECKING([for objdir]) +rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + objdir=_libs +fi +rmdir .libs 2>/dev/null +AC_MSG_RESULT($objdir) + + +AC_ARG_WITH(pic, +[ --with-pic try to use only PIC/non-PIC objects [default=use both]], +pic_mode="$withval", pic_mode=default) +test -z "$pic_mode" && pic_mode=default + +# We assume here that the value for lt_cv_prog_cc_pic will not be cached +# in isolation, and that seeing it set (from the cache) indicates that +# the associated values are set (in the cache) correctly too. +AC_MSG_CHECKING([for $compiler option to produce PIC]) +AC_CACHE_VAL(lt_cv_prog_cc_pic, +[ lt_cv_prog_cc_pic= + lt_cv_prog_cc_shlib= + lt_cv_prog_cc_wl= + lt_cv_prog_cc_static= + lt_cv_prog_cc_no_builtin= + lt_cv_prog_cc_can_build_shared=$can_build_shared + + if test "$GCC" = yes; then + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-static' + + case $host_os in + aix*) + # Below there is a dirty hack to force normal static linking with -ldl + # The problem is because libdl dynamically linked with both libc and + # libC (AIX C++ library), which obviously doesn't included in libraries + # list by gcc. This cause undefined symbols with -static flags. + # This hack allows C programs to be linked with "-static -ldl", but + # not sure about C++ programs. + lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC" + ;; + amigaos*) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4' + ;; + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_cv_prog_cc_pic='-fno-common' + ;; + cygwin* | mingw* | pw32* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + sysv4*MP*) + if test -d /usr/nec; then + lt_cv_prog_cc_pic=-Kconform_pic + fi + ;; + *) + lt_cv_prog_cc_pic='-fPIC' + ;; + esac + else + # PORTME Check for PIC flags for the system compiler. + case $host_os in + aix3* | aix4* | aix5*) + lt_cv_prog_cc_wl='-Wl,' + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_cv_prog_cc_static='-Bstatic' + else + lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + hpux9* | hpux10* | hpux11*) + # Is there a better lt_cv_prog_cc_static that works with the bundled CC? + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static="${lt_cv_prog_cc_wl}-a ${lt_cv_prog_cc_wl}archive" + lt_cv_prog_cc_pic='+Z' + ;; + + irix5* | irix6* | nonstopux*) + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-non_shared' + # PIC (with -KPIC) is the default. + ;; + + cygwin* | mingw* | pw32* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + + newsos6) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + ;; + + osf3* | osf4* | osf5*) + # All OSF/1 code is PIC. + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-non_shared' + ;; + + sco3.2v5*) + lt_cv_prog_cc_pic='-Kpic' + lt_cv_prog_cc_static='-dn' + lt_cv_prog_cc_shlib='-belf' + ;; + + solaris*) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Wl,' + ;; + + sunos4*) + lt_cv_prog_cc_pic='-PIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Qoption ld ' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Wl,' + ;; + + uts4*) + lt_cv_prog_cc_pic='-pic' + lt_cv_prog_cc_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_cv_prog_cc_pic='-Kconform_pic' + lt_cv_prog_cc_static='-Bstatic' + fi + ;; + + *) + lt_cv_prog_cc_can_build_shared=no + ;; + esac + fi +]) +if test -z "$lt_cv_prog_cc_pic"; then + AC_MSG_RESULT([none]) +else + AC_MSG_RESULT([$lt_cv_prog_cc_pic]) + + # Check to make sure the pic_flag actually works. + AC_MSG_CHECKING([if $compiler PIC flag $lt_cv_prog_cc_pic works]) + AC_CACHE_VAL(lt_cv_prog_cc_pic_works, [dnl + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $lt_cv_prog_cc_pic -DPIC" + AC_TRY_COMPILE([], [], [dnl + case $host_os in + hpux9* | hpux10* | hpux11*) + # On HP-UX, both CC and GCC only warn that PIC is supported... then + # they create non-PIC objects. So, if there were any warnings, we + # assume that PIC is not supported. + if test -s conftest.err; then + lt_cv_prog_cc_pic_works=no + else + lt_cv_prog_cc_pic_works=yes + fi + ;; + *) + lt_cv_prog_cc_pic_works=yes + ;; + esac + ], [dnl + lt_cv_prog_cc_pic_works=no + ]) + CFLAGS="$save_CFLAGS" + ]) + + if test "X$lt_cv_prog_cc_pic_works" = Xno; then + lt_cv_prog_cc_pic= + lt_cv_prog_cc_can_build_shared=no + else + lt_cv_prog_cc_pic=" $lt_cv_prog_cc_pic" + fi + + AC_MSG_RESULT([$lt_cv_prog_cc_pic_works]) +fi + +# Check for any special shared library compilation flags. +if test -n "$lt_cv_prog_cc_shlib"; then + AC_MSG_WARN([\`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries]) + if echo "$old_CC $old_CFLAGS " | egrep -e "[[ ]]$lt_cv_prog_cc_shlib[[ ]]" >/dev/null; then : + else + AC_MSG_WARN([add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure]) + lt_cv_prog_cc_can_build_shared=no + fi +fi + +AC_MSG_CHECKING([if $compiler static flag $lt_cv_prog_cc_static works]) +AC_CACHE_VAL([lt_cv_prog_cc_static_works], [dnl + lt_cv_prog_cc_static_works=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_cv_prog_cc_static" + AC_TRY_LINK([], [], [lt_cv_prog_cc_static_works=yes]) + LDFLAGS="$save_LDFLAGS" +]) + +# Belt *and* braces to stop my trousers falling down: +test "X$lt_cv_prog_cc_static_works" = Xno && lt_cv_prog_cc_static= +AC_MSG_RESULT([$lt_cv_prog_cc_static_works]) + +pic_flag="$lt_cv_prog_cc_pic" +special_shlib_compile_flags="$lt_cv_prog_cc_shlib" +wl="$lt_cv_prog_cc_wl" +link_static_flag="$lt_cv_prog_cc_static" +no_builtin_flag="$lt_cv_prog_cc_no_builtin" +can_build_shared="$lt_cv_prog_cc_can_build_shared" + + +# Check to see if options -o and -c are simultaneously supported by compiler +AC_MSG_CHECKING([if $compiler supports -c -o file.$ac_objext]) +AC_CACHE_VAL([lt_cv_compiler_c_o], [ +$rm -r conftest 2>/dev/null +mkdir conftest +cd conftest +echo "int some_variable = 0;" > conftest.$ac_ext +mkdir out +# According to Tom Tromey, Ian Lance Taylor reported there are C compilers +# that will create temporary files in the current directory regardless of +# the output directory. Thus, making CWD read-only will cause this test +# to fail, enabling locking or at least warning the user not to do parallel +# builds. +chmod -w . +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -o out/conftest2.$ac_objext" +compiler_c_o=no +if { (eval echo configure:__oline__: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.$ac_objext; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s out/conftest.err; then + lt_cv_compiler_c_o=no + else + lt_cv_compiler_c_o=yes + fi +else + # Append any errors to the config.log. + cat out/conftest.err 1>&AC_FD_CC + lt_cv_compiler_c_o=no +fi +CFLAGS="$save_CFLAGS" +chmod u+w . +$rm conftest* out/* +rmdir out +cd .. +rmdir conftest +$rm -r conftest 2>/dev/null +]) +compiler_c_o=$lt_cv_compiler_c_o +AC_MSG_RESULT([$compiler_c_o]) + +if test x"$compiler_c_o" = x"yes"; then + # Check to see if we can write to a .lo + AC_MSG_CHECKING([if $compiler supports -c -o file.lo]) + AC_CACHE_VAL([lt_cv_compiler_o_lo], [ + lt_cv_compiler_o_lo=no + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -c -o conftest.lo" + save_objext="$ac_objext" + ac_objext=lo + AC_TRY_COMPILE([], [int some_variable = 0;], [dnl + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + lt_cv_compiler_o_lo=no + else + lt_cv_compiler_o_lo=yes + fi + ]) + ac_objext="$save_objext" + CFLAGS="$save_CFLAGS" + ]) + compiler_o_lo=$lt_cv_compiler_o_lo + AC_MSG_RESULT([$compiler_o_lo]) +else + compiler_o_lo=no +fi + +# Check to see if we can do hard links to lock some files if needed +hard_links="nottested" +if test "$compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $rm conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test "$hard_links" = no; then + AC_MSG_WARN([\`$CC' does not support \`-c -o', so \`make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi + +if test "$GCC" = yes; then + # Check to see if options -fno-rtti -fno-exceptions are supported by compiler + AC_MSG_CHECKING([if $compiler supports -fno-rtti -fno-exceptions]) + echo "int some_variable = 0;" > conftest.$ac_ext + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.$ac_ext" + compiler_rtti_exceptions=no + AC_TRY_COMPILE([], [int some_variable = 0;], [dnl + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + compiler_rtti_exceptions=no + else + compiler_rtti_exceptions=yes + fi + ]) + CFLAGS="$save_CFLAGS" + AC_MSG_RESULT([$compiler_rtti_exceptions]) + + if test "$compiler_rtti_exceptions" = "yes"; then + no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions' + else + no_builtin_flag=' -fno-builtin' + fi +fi + +# See if the linker supports building shared libraries. +AC_MSG_CHECKING([whether the linker ($LD) supports shared libraries]) + +allow_undefined_flag= +no_undefined_flag= +need_lib_prefix=unknown +need_version=unknown +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +archive_cmds= +archive_expsym_cmds= +old_archive_from_new_cmds= +old_archive_from_expsyms_cmds= +export_dynamic_flag_spec= +whole_archive_flag_spec= +thread_safe_flag_spec= +hardcode_into_libs=no +hardcode_libdir_flag_spec= +hardcode_libdir_separator= +hardcode_direct=no +hardcode_minus_L=no +hardcode_shlibpath_var=unsupported +runpath_var= +link_all_deplibs=unknown +always_export_symbols=no +export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols' +# include_expsyms should be a list of space-separated symbols to be *always* +# included in the symbol list +include_expsyms= +# exclude_expsyms can be an egrep regular expression of symbols to exclude +# it will be wrapped by ` (' and `)$', so one must not match beginning or +# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', +# as well as any symbol that contains `d'. +exclude_expsyms="_GLOBAL_OFFSET_TABLE_" +# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out +# platforms (ab)use it in PIC code, but their linkers get confused if +# the symbol is explicitly referenced. Since portable code cannot +# rely on this symbol name, it's probably fine to never include it in +# preloaded symbol tables. +extract_expsyms_cmds= + +case $host_os in +cygwin* | mingw* | pw32*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; +openbsd*) + with_gnu_ld=no + ;; +esac + +ld_shlibs=yes +if test "$with_gnu_ld" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # See if GNU ld supports shared libraries. + case $host_os in + aix3* | aix4* | aix5*) + # On AIX, the GNU linker is very broken + # Note:Check GNU linker on AIX 5-IA64 when/if it becomes available. + ld_shlibs=no + cat <&2 + +*** Warning: the GNU linker, at least up to release 2.9.1, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to modify your PATH +*** so that a non-GNU linker is found, and then restart. + +EOF + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + + # Samuel A. Falvo II reports + # that the semantics of dynamic libraries on AmigaOS, at least up + # to version 4, is to share data among multiple programs linked + # with the same dynamic library. Since this doesn't match the + # behavior of shared libraries on other platforms, we can use + # them. + ld_shlibs=no + ;; + + beos*) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32*) + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + allow_undefined_flag=unsupported + always_export_symbols=yes + + extract_expsyms_cmds='test -f $output_objdir/impgen.c || \ + sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //;s/^# *$//; p; }" -e d < $''0 > $output_objdir/impgen.c~ + test -f $output_objdir/impgen.exe || (cd $output_objdir && \ + if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \ + else $CC -o impgen impgen.c ; fi)~ + $output_objdir/impgen $dir/$soroot > $output_objdir/$soname-def' + + old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib' + + # cygwin and mingw dlls have different entry points and sets of symbols + # to exclude. + # FIXME: what about values for MSVC? + dll_entry=__cygwin_dll_entry@12 + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~ + case $host_os in + mingw*) + # mingw values + dll_entry=_DllMainCRTStartup@12 + dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~ + ;; + esac + + # mingw and cygwin differ, and it's simplest to just exclude the union + # of the two symbol sets. + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12 + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one (in ltdll.c) + if test "x$lt_cv_need_dllmain" = "xyes"; then + ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext " + ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~ + test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~' + else + ltdll_obj= + ltdll_cmds= + fi + + # Extract the symbol export list from an `--export-all' def file, + # then regenerate the def file from the symbol export list, so that + # the compiled dll only exports the symbol export list. + # Be careful not to strip the DATA tag left be newer dlltools. + export_symbols_cmds="$ltdll_cmds"' + $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~ + sed -e "1,/EXPORTS/d" -e "s/ @ [[0-9]]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols' + + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is. + # If DATA tags from a recent dlltool are present, honour them! + archive_expsym_cmds='if test "x`sed 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname-def; + else + echo EXPORTS > $output_objdir/$soname-def; + _lt_hint=1; + cat $export_symbols | while read symbol; do + set dummy \$symbol; + case \[$]# in + 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; + 4) echo " \[$]2 \[$]3 \[$]4 ; " >> $output_objdir/$soname-def; _lt_hint=`expr \$_lt_hint - 1`;; + *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;; + esac; + _lt_hint=`expr 1 + \$_lt_hint`; + done; + fi~ + '"$ltdll_cmds"' + $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~ + $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp --output-lib $output_objdir/$libname.dll.a~ + $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags' + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris* | sysv5*) + if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +EOF + elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = yes; then + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + case $host_os in + cygwin* | mingw* | pw32*) + # dlltool doesn't understand --whole-archive et. al. + whole_archive_flag_spec= + ;; + *) + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + ;; + esac + fi +else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$GCC" = yes && test -z "$link_static_flag"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix4* | aix5*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + hardcode_direct=yes + archive_cmds='' + hardcode_libdir_separator=':' + if test "$GCC" = yes; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && \ + strings "$collect2name" | grep resolve_lib_name >/dev/null + then + # We have reworked collect2 + hardcode_direct=yes + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + esac + + shared_flag='-shared' + else + # not using gcc + if test "$host_cpu" = ia64; then + shared_flag='${wl}-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + # It seems that -bexpall can do strange things, so it is better to + # generate a list of symbols to export. + always_export_symbols=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib' + archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" + else + hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib' + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='${wl}-berok' + # This is a bit strange, but is similar to how AIX traditionally builds + # it's shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname' + fi + fi + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + # see comment about different semantics on the GNU ld section + ld_shlibs=no + ;; + + cygwin* | mingw* | pw32*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs' + fix_srcfile_path='`cygpath -w "$srcfile"`' + ;; + + darwin* | rhapsody*) + case "$host_os" in + rhapsody* | darwin1.[[012]]) + allow_undefined_flag='-undefined suppress' + ;; + *) # Darwin 1.3 on + allow_undefined_flag='-flat_namespace -undefined suppress' + ;; + esac + # FIXME: Relying on posixy $() will cause problems for + # cross-compilation, but unfortunately the echo tests do not + # yet detect zsh echo's removal of \ escapes. Also zsh mangles + # `"' quotes if we put them in here... so don't! + archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$linker_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)' + # We need to add '_' to the symbols in $export_symbols first + #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols' + hardcode_direct=yes + hardcode_shlibpath_var=no + whole_archive_flag_spec='-all_load $convenience' + ;; + + freebsd1*) + ld_shlibs=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd*) + archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9* | hpux10* | hpux11*) + case $host_os in + hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;; + *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;; + esac + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_minus_L=yes # Not in the search PATH, but as the default + # location of the library. + export_dynamic_flag_spec='${wl}-E' + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='-rpath $libdir' + fi + hardcode_libdir_separator=: + link_all_deplibs=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + openbsd*) + hardcode_direct=yes + hardcode_shlibpath_var=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case "$host_os" in + openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + fi + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "-exported_symbol " >> $lib.exp; echo "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ + $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib~$rm $lib.exp' + + #Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + hardcode_libdir_separator=: + ;; + + sco3.2v5*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + export_dynamic_flag_spec='${wl}-Bexport' + ;; + + solaris*) + # gcc --version < 3.0 without binutils cannot create self contained + # shared libraries reliably, requiring libgcc.a to resolve some of + # the object symbols generated in some cases. Libraries that use + # assert need libgcc.a to resolve __eprintf, for example. Linking + # a copy of libgcc.a into every shared library to guarantee resolving + # such symbols causes other problems: According to Tim Van Holder + # , C++ libraries end up with a separate + # (to the application) exception stack for one thing. + no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + case `$CC --version 2>/dev/null` in + [[12]].*) + cat <&2 + +*** Warning: Releases of GCC earlier than version 3.0 cannot reliably +*** create self contained shared libraries on Solaris systems, without +*** introducing a dependency on libgcc.a. Therefore, libtool is disabling +*** -no-undefined support, which will at least allow you to build shared +*** libraries. However, you may find that when you link such libraries +*** into an application without using GCC, you have to manually add +*** \`gcc --print-libgcc-file-name\` to the link command. We urge you to +*** upgrade to a newer version of GCC. Another option is to rebuild your +*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer. + +EOF + no_undefined_flag= + ;; + esac + fi + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv5*) + no_undefined_flag=' -z text' + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec= + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4.2uw2*) + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=no + hardcode_shlibpath_var=no + hardcode_runpath_var=yes + runpath_var=LD_RUN_PATH + ;; + + sysv5uw7* | unixware7*) + no_undefined_flag='${wl}-z ${wl}text' + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac +fi +AC_MSG_RESULT([$ld_shlibs]) +test "$ld_shlibs" = no && can_build_shared=no + +# Check hardcoding attributes. +AC_MSG_CHECKING([how to hardcode library paths into programs]) +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || \ + test -n "$runpath_var"; then + + # We can hardcode non-existant directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$hardcode_shlibpath_var" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +AC_MSG_RESULT([$hardcode_action]) + +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi + +reload_cmds='$LD$reload_flag -o $output$reload_objs' +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +# PORTME Fill in your ld.so characteristics +AC_MSG_CHECKING([dynamic linker characteristics]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + +case $host_os in +aix3*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}.so$major' + ;; + +aix4* | aix5*) + version_type=linux + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}.so$major ${libname}${release}.so$versuffix $libname.so' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can + # not hardcode correct soname into executable. Probably we can + # add versioning support to collect2, so additional links can + # be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}.so$major' + fi + shlibpath_var=LIBPATH + fi + hardcode_into_libs=yes + ;; + +amigaos*) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' + ;; + +beos*) + library_names_spec='${libname}.so' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi4*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + export_dynamic_flag_spec=-rdynamic + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32*) + version_type=windows + need_version=no + need_lib_prefix=no + case $GCC,$host_os in + yes,cygwin*) + library_names_spec='$libname.dll.a' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll' + postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog .libs/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`bash 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $rm \$dlpath' + ;; + yes,mingw*) + library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll' + sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g" -e "s,=/,/,g"` + ;; + yes,pw32*) + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' + ;; + *) + library_names_spec='${libname}`echo ${release} | sed -e 's/[[.]]/-/g'`${versuffix}.dll $libname.lib' + ;; + esac + dynamic_linker='Win32 ld.exe' + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + # FIXME: Relying on posixy $() will cause problems for + # cross-compilation, but unfortunately the echo tests do not + # yet detect zsh echo's removal of \ escapes. + library_names_spec='${libname}${release}${versuffix}.$(test .$module = .yes && echo so || echo dylib) ${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib) ${libname}.$(test .$module = .yes && echo so || echo dylib)' + soname_spec='${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib)' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd*) + objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout` + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + *) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + dynamic_linker="$host_os dld.sl" + version_type=sunos + need_lib_prefix=no + need_version=no + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl' + soname_spec='${libname}${release}.sl$major' + # HP-UX runs *really* slowly unless shared libraries are mode 555. + postinstall_cmds='chmod 555 $lib' + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) version_type=irix ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux-gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + + # Find out which ABI we are using (multilib Linux x86_64 hack). + libsuff= + case "$host_cpu" in + x86_64*|s390x*) + echo '[#]line __oline__ "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *64-bit*) + libsuff=64 + ;; + esac + fi + rm -rf conftest* + ;; + *) + ;; + esac + sys_lib_dlsearch_path_spec="/lib${libsuff} /usr/lib${libsuff}" + sys_lib_search_path_spec="/lib${libsuff} /usr/lib${libsuff} /usr/local/lib${libsuff}" + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so' + soname_spec='${libname}${release}.so$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +openbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case "$host_os" in + openbsd2.[[89]] | openbsd2.[[89]].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + ;; + +os2*) + libname_spec='$name' + need_lib_prefix=no + library_names_spec='$libname.dll $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_version=no + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + hardcode_into_libs=yes + ;; + +sco3.2v5*) + version_type=osf + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + export_dynamic_flag_spec='${wl}-Blargedynsym' + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so' + soname_spec='$libname.so.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test "$dynamic_linker" = no && can_build_shared=no + +# Report the final consequences. +AC_MSG_CHECKING([if libtool supports shared libraries]) +AC_MSG_RESULT([$can_build_shared]) + +AC_MSG_CHECKING([whether to build shared libraries]) +test "$can_build_shared" = "no" && enable_shared=no + +# On AIX, shared libraries and static libraries use the same namespace, and +# are all built from PIC. +case "$host_os" in +aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + +aix4*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; +esac +AC_MSG_RESULT([$enable_shared]) + +AC_MSG_CHECKING([whether to build static libraries]) +# Make sure either enable_shared or enable_static is yes. +test "$enable_shared" = yes || enable_static=yes +AC_MSG_RESULT([$enable_static]) + +if test "$hardcode_action" = relink; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +AC_LIBTOOL_DLOPEN_SELF + +if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_MSG_CHECKING([whether -lc should be explicitly linked in]) + AC_CACHE_VAL([lt_cv_archive_cmds_need_lc], + [$rm conftest* + echo 'static int dummy;' > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile); then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_cv_prog_cc_wl + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if AC_TRY_EVAL(archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $rm conftest*]) + AC_MSG_RESULT([$lt_cv_archive_cmds_need_lc]) + ;; + esac +fi +need_lc=${lt_cv_archive_cmds_need_lc-yes} + +# The second clause should only fire when bootstrapping the +# libtool distribution, otherwise you forgot to ship ltmain.sh +# with your package, and you will get complaints that there are +# no rules to generate ltmain.sh. +if test -f "$ltmain"; then + : +else + # If there is no Makefile yet, we rely on a make rule to execute + # `config.status --recheck' to rerun these tests and create the + # libtool script then. + test -f Makefile && make "$ltmain" +fi + +if test -f "$ltmain"; then + trap "$rm \"${ofile}T\"; exit 1" 1 2 15 + $rm -f "${ofile}T" + + echo creating $ofile + + # Now quote all the things that may contain metacharacters while being + # careful not to overquote the AC_SUBSTed values. We take copies of the + # variables and quote the copies for generation of the libtool script. + for var in echo old_CC old_CFLAGS SED \ + AR AR_FLAGS CC LD LN_S NM SHELL \ + reload_flag reload_cmds wl \ + pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \ + thread_safe_flag_spec whole_archive_flag_spec libname_spec \ + library_names_spec soname_spec \ + RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ + old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \ + postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \ + old_striplib striplib file_magic_cmd export_symbols_cmds \ + deplibs_check_method allow_undefined_flag no_undefined_flag \ + finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ + global_symbol_to_c_name_address \ + hardcode_libdir_flag_spec hardcode_libdir_separator \ + sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ + compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do + + case $var in + reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ + old_postinstall_cmds | old_postuninstall_cmds | \ + export_symbols_cmds | archive_cmds | archive_expsym_cmds | \ + extract_expsyms_cmds | old_archive_from_expsyms_cmds | \ + postinstall_cmds | postuninstall_cmds | \ + finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) + # Double-quote double-evaled strings. + eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" + ;; + *) + eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" + ;; + esac + done + + cat <<__EOF__ > "${ofile}T" +#! $SHELL + +# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +# Copyright (C) 1996-2000 Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit , 1996 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# A sed that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="${SED} -e s/^X//" + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi + +# ### BEGIN LIBTOOL CONFIG + +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$need_lc + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# The host system. +host_alias=$host_alias +host=$host + +# An echo program that does not interpret backslashes. +echo=$lt_echo + +# The archiver. +AR=$lt_AR +AR_FLAGS=$lt_AR_FLAGS + +# The default C compiler. +CC=$lt_CC + +# Is the compiler the GNU C compiler? +with_gcc=$GCC + +# The linker used to build libraries. +LD=$lt_LD + +# Whether we need hard or soft links. +LN_S=$lt_LN_S + +# A BSD-compatible nm program. +NM=$lt_NM + +# A symbol stripping program +STRIP=$STRIP + +# Used to examine libraries when file_magic_cmd begins "file" +MAGIC_CMD=$MAGIC_CMD + +# Used on cygwin: DLL creation program. +DLLTOOL="$DLLTOOL" + +# Used on cygwin: object dumper. +OBJDUMP="$OBJDUMP" + +# Used on cygwin: assembler. +AS="$AS" + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# How to pass a linker flag through the compiler. +wl=$lt_wl + +# Object file suffix (normally "o"). +objext="$ac_objext" + +# Old archive suffix (normally "a"). +libext="$libext" + +# Executable file suffix (normally ""). +exeext="$exeext" + +# Additional compiler flags for building library objects. +pic_flag=$lt_pic_flag +pic_mode=$pic_mode + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_compiler_c_o + +# Can we write directly to a .lo ? +compiler_o_lo=$lt_compiler_o_lo + +# Must we lock files when doing compilation ? +need_locks=$lt_need_locks + +# Do we need the lib prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_link_static_flag + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_no_builtin_flag + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Compiler flag to generate thread-safe objects. +thread_safe_flag_spec=$lt_thread_safe_flag_spec + +# Library versioning type. +version_type=$version_type + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME. +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Commands used to build and install an old-style archive. +RANLIB=$lt_RANLIB +old_archive_cmds=$lt_old_archive_cmds +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build and install a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds +postinstall_cmds=$lt_postinstall_cmds +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method == file_magic. +file_magic_cmd=$lt_file_magic_cmd + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that forces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# Same as above, but a single script fragment to be evaled but not shown. +finish_eval=$lt_finish_eval + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_global_symbol_pipe + +# Transform the output of nm in a proper C declaration +global_symbol_to_cdecl=$lt_global_symbol_to_cdecl + +# Transform the output of nm in a C name address pair +global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address + +# This is the shared library runtime path variable. +runpath_var=$runpath_var + +# This is the shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist. +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single -rpath flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the +# resulting binary. +hardcode_direct=$hardcode_direct + +# Set to yes if using the -LDIR flag during linking hardcodes DIR into the +# resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into +# the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at relink time. +variables_saved_for_relink="$variables_saved_for_relink" + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Compile-time system search path for libraries +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Run-time system search path for libraries +sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec + +# Fix the shell variable \$srcfile for the compiler. +fix_srcfile_path="$fix_srcfile_path" + +# Set to yes if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# ### END LIBTOOL CONFIG + +__EOF__ + + case $host_os in + aix3*) + cat <<\EOF >> "${ofile}T" + +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +EOF + ;; + esac + + case $host_os in + cygwin* | mingw* | pw32* | os2*) + cat <<'EOF' >> "${ofile}T" + # This is a source program that is used to create dlls on Windows + # Don't remove nor modify the starting and closing comments +# /* ltdll.c starts here */ +# #define WIN32_LEAN_AND_MEAN +# #include +# #undef WIN32_LEAN_AND_MEAN +# #include +# +# #ifndef __CYGWIN__ +# # ifdef __CYGWIN32__ +# # define __CYGWIN__ __CYGWIN32__ +# # endif +# #endif +# +# #ifdef __cplusplus +# extern "C" { +# #endif +# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); +# #ifdef __cplusplus +# } +# #endif +# +# #ifdef __CYGWIN__ +# #include +# DECLARE_CYGWIN_DLL( DllMain ); +# #endif +# HINSTANCE __hDllInstance_base; +# +# BOOL APIENTRY +# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) +# { +# __hDllInstance_base = hInst; +# return TRUE; +# } +# /* ltdll.c ends here */ + # This is a source program that is used to create import libraries + # on Windows for dlls which lack them. Don't remove nor modify the + # starting and closing comments +# /* impgen.c starts here */ +# /* Copyright (C) 1999-2000 Free Software Foundation, Inc. +# +# This file is part of GNU libtool. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# */ +# +# #include /* for printf() */ +# #include /* for open(), lseek(), read() */ +# #include /* for O_RDONLY, O_BINARY */ +# #include /* for strdup() */ +# +# /* O_BINARY isn't required (or even defined sometimes) under Unix */ +# #ifndef O_BINARY +# #define O_BINARY 0 +# #endif +# +# static unsigned int +# pe_get16 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[2]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 2); +# return b[0] + (b[1]<<8); +# } +# +# static unsigned int +# pe_get32 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[4]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 4); +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# static unsigned int +# pe_as32 (ptr) +# void *ptr; +# { +# unsigned char *b = ptr; +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# int +# main (argc, argv) +# int argc; +# char *argv[]; +# { +# int dll; +# unsigned long pe_header_offset, opthdr_ofs, num_entries, i; +# unsigned long export_rva, export_size, nsections, secptr, expptr; +# unsigned long name_rvas, nexp; +# unsigned char *expdata, *erva; +# char *filename, *dll_name; +# +# filename = argv[1]; +# +# dll = open(filename, O_RDONLY|O_BINARY); +# if (dll < 1) +# return 1; +# +# dll_name = filename; +# +# for (i=0; filename[i]; i++) +# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':') +# dll_name = filename + i +1; +# +# pe_header_offset = pe_get32 (dll, 0x3c); +# opthdr_ofs = pe_header_offset + 4 + 20; +# num_entries = pe_get32 (dll, opthdr_ofs + 92); +# +# if (num_entries < 1) /* no exports */ +# return 1; +# +# export_rva = pe_get32 (dll, opthdr_ofs + 96); +# export_size = pe_get32 (dll, opthdr_ofs + 100); +# nsections = pe_get16 (dll, pe_header_offset + 4 +2); +# secptr = (pe_header_offset + 4 + 20 + +# pe_get16 (dll, pe_header_offset + 4 + 16)); +# +# expptr = 0; +# for (i = 0; i < nsections; i++) +# { +# char sname[8]; +# unsigned long secptr1 = secptr + 40 * i; +# unsigned long vaddr = pe_get32 (dll, secptr1 + 12); +# unsigned long vsize = pe_get32 (dll, secptr1 + 16); +# unsigned long fptr = pe_get32 (dll, secptr1 + 20); +# lseek(dll, secptr1, SEEK_SET); +# read(dll, sname, 8); +# if (vaddr <= export_rva && vaddr+vsize > export_rva) +# { +# expptr = fptr + (export_rva - vaddr); +# if (export_rva + export_size > vaddr + vsize) +# export_size = vsize - (export_rva - vaddr); +# break; +# } +# } +# +# expdata = (unsigned char*)malloc(export_size); +# lseek (dll, expptr, SEEK_SET); +# read (dll, expdata, export_size); +# erva = expdata - export_rva; +# +# nexp = pe_as32 (expdata+24); +# name_rvas = pe_as32 (expdata+32); +# +# printf ("EXPORTS\n"); +# for (i = 0; i> "${ofile}T" || (rm -f "${ofile}T"; exit 1) + + mv -f "${ofile}T" "$ofile" || \ + (rm -f "$ofile" && cp "${ofile}T" "$ofile" && rm -f "${ofile}T") + chmod +x "$ofile" +fi + +])# _LT_AC_LTCONFIG_HACK + +# AC_LIBTOOL_DLOPEN - enable checks for dlopen support +AC_DEFUN([AC_LIBTOOL_DLOPEN], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])]) + +# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's +AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])]) + +# AC_ENABLE_SHARED - implement the --enable-shared flag +# Usage: AC_ENABLE_SHARED[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN([AC_ENABLE_SHARED], +[define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(shared, +changequote(<<, >>)dnl +<< --enable-shared[=PKGS] build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case $enableval in +yes) enable_shared=yes ;; +no) enable_shared=no ;; +*) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl +]) + +# AC_DISABLE_SHARED - set the default shared flag to --disable-shared +AC_DEFUN([AC_DISABLE_SHARED], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_SHARED(no)]) + +# AC_ENABLE_STATIC - implement the --enable-static flag +# Usage: AC_ENABLE_STATIC[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN([AC_ENABLE_STATIC], +[define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(static, +changequote(<<, >>)dnl +<< --enable-static[=PKGS] build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case $enableval in +yes) enable_static=yes ;; +no) enable_static=no ;; +*) + enable_static=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_static=AC_ENABLE_STATIC_DEFAULT)dnl +]) + +# AC_DISABLE_STATIC - set the default static flag to --disable-static +AC_DEFUN([AC_DISABLE_STATIC], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_STATIC(no)]) + + +# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag +# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN([AC_ENABLE_FAST_INSTALL], +[define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(fast-install, +changequote(<<, >>)dnl +<< --enable-fast-install[=PKGS] optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case $enableval in +yes) enable_fast_install=yes ;; +no) enable_fast_install=no ;; +*) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl +]) + +# AC_DISABLE_FAST_INSTALL - set the default to --disable-fast-install +AC_DEFUN([AC_DISABLE_FAST_INSTALL], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_FAST_INSTALL(no)]) + +# AC_LIBTOOL_PICMODE - implement the --with-pic flag +# Usage: AC_LIBTOOL_PICMODE[(MODE)] +# Where MODE is either `yes' or `no'. If omitted, it defaults to +# `both'. +AC_DEFUN([AC_LIBTOOL_PICMODE], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +pic_mode=ifelse($#,1,$1,default)]) + + +# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library +AC_DEFUN([AC_PATH_TOOL_PREFIX], +[AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in + /*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; + ?:/*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC_CMD="$MAGIC_CMD" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="ifelse([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$1; then + lt_cv_path_MAGIC_CMD="$ac_dir/$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC_CMD="$ac_save_MAGIC_CMD" + ;; +esac]) +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +]) + + +# AC_PATH_MAGIC - find a file program which can recognise a shared library +AC_DEFUN([AC_PATH_MAGIC], +[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl +AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH) + else + MAGIC_CMD=: + fi +fi +]) + + +# AC_PROG_LD - find the path to the GNU or non-GNU linker +AC_DEFUN([AC_PROG_LD], +[AC_ARG_WITH(gnu-ld, +[ --with-gnu-ld assume the C compiler uses GNU ld [default=no]], +test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no) +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by GCC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | [[A-Za-z]]:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the path of ld + ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` + while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do + ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some GNU ld's only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + if "$lt_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then + test "$with_gnu_ld" != no && break + else + test "$with_gnu_ld" != yes && break + fi + fi + done + IFS="$ac_save_ifs" +else + lt_cv_path_LD="$LD" # Let the user override the test with a path. +fi]) +LD="$lt_cv_path_LD" +if test -n "$LD"; then + AC_MSG_RESULT($LD) +else + AC_MSG_RESULT(no) +fi +test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH]) +AC_PROG_LD_GNU +]) + +# AC_PROG_LD_GNU - +AC_DEFUN([AC_PROG_LD_GNU], +[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld, +[# I'd rather use --version here, but apparently some GNU ld's only accept -v. +if $LD -v 2>&1 &5; then + lt_cv_prog_gnu_ld=yes +else + lt_cv_prog_gnu_ld=no +fi]) +with_gnu_ld=$lt_cv_prog_gnu_ld +]) + +# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker +# -- PORTME Some linkers may need a different reload flag. +AC_DEFUN([AC_PROG_LD_RELOAD_FLAG], +[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag, +[lt_cv_ld_reload_flag='-r']) +reload_flag=$lt_cv_ld_reload_flag +test -n "$reload_flag" && reload_flag=" $reload_flag" +]) + +# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +AC_DEFUN([AC_DEPLIBS_CHECK_METHOD], +[AC_CACHE_CHECK([how to recognise dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given egrep regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix4* | aix5*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi4*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin* | mingw* | pw32*) + lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library' + lt_cv_file_magic_cmd='/usr/bin/file -L' + case "$host_os" in + rhapsody* | darwin1.[[012]]) + lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1` + ;; + *) # Darwin 1.3 on + lt_cv_file_magic_test_file='/usr/lib/libSystem.dylib' + ;; + esac + ;; + +freebsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20*|hpux11*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + irix5* | nonstopux*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" + ;; + *) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[[1234]] dynamic lib MIPS - version 1" + ;; + esac + lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux-gnu*) + case $host_cpu in + alpha* | hppa* | i*86 | mips | mipsel | powerpc* | sparc* | ia64* | s390* | x86_64*) + lt_cv_deplibs_check_method=pass_all ;; + *) + # glibc up to 2.1.1 does not perform some relocations on ARM + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; + esac + lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so\.[[0-9]]+\.[[0-9]]+$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/\.]]+\.so$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +openbsd*) + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB shared object' + else + lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library' + fi + ;; + +osf3* | osf4* | osf5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' + lt_cv_file_magic_test_file=/shlib/libc.so + lt_cv_deplibs_check_method=pass_all + ;; + +sco3.2v5*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + lt_cv_file_magic_test_file=/lib/libc.so + ;; + +sysv5uw[[78]]* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; +esac +]) +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +]) + + +# AC_PROG_NM - find the path to a BSD-compatible name lister +AC_DEFUN([AC_PROG_NM], +[AC_REQUIRE([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR])dnl +AC_MSG_CHECKING([for BSD-compatible nm]) +AC_CACHE_VAL(lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/${ac_tool_prefix}nm + if test -f $tmp_nm || test -f $tmp_nm$ac_exeext ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + if ($tmp_nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep '(/dev/null|Invalid file or object type)' >/dev/null; then + lt_cv_path_NM="$tmp_nm -B" + break + elif ($tmp_nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + lt_cv_path_NM="$tmp_nm -p" + break + else + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + fi + fi + done + IFS="$ac_save_ifs" + test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm +fi]) +NM="$lt_cv_path_NM" +AC_MSG_RESULT([$NM]) +]) + +# AC_CHECK_LIBM - check for math library +AC_DEFUN([AC_CHECK_LIBM], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cygwin* | *-*-pw32*) + # These system don't have libm + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") + AC_CHECK_LIB(m, main, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, main, LIBM="-lm") + ;; +esac +]) + +# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for +# the libltdl convenience library and LTDLINCL to the include flags for +# the libltdl header and adds --enable-ltdl-convenience to the +# configure arguments. Note that LIBLTDL and LTDLINCL are not +# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not +# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed +# with '${top_builddir}/' and LTDLINCL will be prefixed with +# '${top_srcdir}/' (note the single quotes!). If your package is not +# flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. +AC_DEFUN([AC_LIBLTDL_CONVENIENCE], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl + case $enable_ltdl_convenience in + no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; + "") enable_ltdl_convenience=yes + ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; + esac + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la + LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) + # For backwards non-gettext consistent compatibility... + INCLTDL="$LTDLINCL" +]) + +# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for +# the libltdl installable library and LTDLINCL to the include flags for +# the libltdl header and adds --enable-ltdl-install to the configure +# arguments. Note that LIBLTDL and LTDLINCL are not AC_SUBSTed, nor is +# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed +# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will +# be prefixed with '${top_builddir}/' and LTDLINCL will be prefixed +# with '${top_srcdir}/' (note the single quotes!). If your package is +# not flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. +# In the future, this macro may have to be called after AC_PROG_LIBTOOL. +AC_DEFUN([AC_LIBLTDL_INSTALLABLE], +[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl + AC_CHECK_LIB(ltdl, main, + [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no], + [if test x"$enable_ltdl_install" = xno; then + AC_MSG_WARN([libltdl not installed, but installation disabled]) + else + enable_ltdl_install=yes + fi + ]) + if test x"$enable_ltdl_install" = x"yes"; then + ac_configure_args="$ac_configure_args --enable-ltdl-install" + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la + LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) + else + ac_configure_args="$ac_configure_args --enable-ltdl-install=no" + LIBLTDL="-lltdl" + LTDLINCL= + fi + # For backwards non-gettext consistent compatibility... + INCLTDL="$LTDLINCL" +]) + +# old names +AC_DEFUN([AM_PROG_LIBTOOL], [AC_PROG_LIBTOOL]) +AC_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AC_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) +AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) +AC_DEFUN([AM_PROG_LD], [AC_PROG_LD]) +AC_DEFUN([AM_PROG_NM], [AC_PROG_NM]) + +# This is just to silence aclocal about the macro not being used +ifelse([AC_DISABLE_FAST_INSTALL]) + +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +# LT_AC_PROG_SED +# -------------- +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +AC_DEFUN([LT_AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_executable_p="test -f" +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + _sed_list="$_sed_list $as_dir/$ac_prog$ac_exec_ext" + fi + done + done +done + + # Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. +: ${TMPDIR=/tmp} +{ + tmp=`(umask 077 && mktemp -d -q "$TMPDIR/sedXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=$TMPDIR/sed$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in $TMPDIR" >&2 + { (exit 1); exit 1; } +} + _max=0 + _count=0 + # Add /usr/xpg4/bin/sed as it is typically found on Solaris + # along with /bin/sed that truncates output. + for _sed in $_sed_list /usr/xpg4/bin/sed; do + test ! -f ${_sed} && break + cat /dev/null > "$tmp/sed.in" + _count=0 + echo ${ECHO_N-$ac_n} "0123456789${ECHO_C-$ac_c}" >"$tmp/sed.in" + # Check for GNU sed and select it if it is found. + if "${_sed}" --version 2>&1 < /dev/null | egrep '(GNU)' > /dev/null; then + lt_cv_path_SED=${_sed} + break + fi + while true; do + cat "$tmp/sed.in" "$tmp/sed.in" >"$tmp/sed.tmp" + mv "$tmp/sed.tmp" "$tmp/sed.in" + cp "$tmp/sed.in" "$tmp/sed.nl" + echo >>"$tmp/sed.nl" + ${_sed} -e 's/a$//' < "$tmp/sed.nl" >"$tmp/sed.out" || break + cmp -s "$tmp/sed.out" "$tmp/sed.nl" || break + # 40000 chars as input seems more than enough + test $_count -gt 10 && break + _count=`expr $_count + 1` + if test $_count -gt $_max; then + _max=$_count + lt_cv_path_SED=$_sed + fi + done + done + rm -rf "$tmp" +]) +if test "X$SED" != "X"; then + lt_cv_path_SED=$SED +else + SED=$lt_cv_path_SED +fi +AC_MSG_RESULT([$SED]) +]) + diff --git a/altui/README.alt b/altui/README.alt new file mode 100644 index 0000000..e0f31db --- /dev/null +++ b/altui/README.alt @@ -0,0 +1,71 @@ +Here is an alternate command-line user interface for the IJG JPEG software. +It is designed for use under MS-DOS, and may also be useful on other non-Unix +operating systems. (For that matter, this code works fine on Unix, but the +standard command-line syntax is better on Unix because it is pipe-friendly.) + +With this user interface, cjpeg and djpeg accept multiple input file names +on the command line; output file names are generated by substituting +appropriate extensions. The user is prompted before any already-existing +file will be overwritten. See usage.alt for details. + +Expansion of wild-card file specifications is useful but is not directly +provided by this code. Most DOS C compilers have the ability to do wild-card +expansion "behind the scenes", and we rely on that feature. On other systems, +the shell may do it for you, as is done on Unix. + +Also, a DOS-specific routine is provided to determine available memory; +this makes the -maxmemory switch unnecessary except in unusual cases. +If you know how to determine available memory on a different system, +you can easily add the necessary code. (And please send it along to +jpeg-info@uunet.uu.net so we can include it in future releases!) + + +INSTALLATION +============ + +You need to have the main IJG JPEG distribution, release 6 or later. +Replace the standard cjpeg.c and djpeg.c files with the ones provided here. +Then build the software as described in the main distribution's install.doc +file, with these exceptions: + +* Define PROGRESS_REPORT in jconfig.h if you want the percent-done display. +* Define NO_OVERWRITE_CHECK if you *don't* want overwrite confirmation. +* You may ignore the USE_SETMODE and TWO_FILE_COMMANDLINE symbols discussed + in install.doc; these files do not use them. +* As given, djpeg.c defaults to GIF output (not PPM output as in the standard + djpeg.c). If you want something different, modify DEFAULT_FMT. + +You may also need to do something special to enable filename wild-card +expansion, assuming your compiler has that capability at all. + +Modify the standard usage.doc file as described in usage.alt. (If you want +to use the Unix-style manual pages cjpeg.1 and djpeg.1, better fix them too.) + + +Here are some specific notes for popular MS-DOS compilers: + +Borland C: + Add "-DMSDOS" to CFLAGS to enable use of the DOS memory determination code. + Link with the standard library file WILDARGS.OBJ to get wild-card expansion. + +Microsoft C: + Add "-DMSDOS" to CFLAGS to enable use of the DOS memory determination code. + Link with the standard library file SETARGV.OBJ to get wild-card expansion. + In the versions I've used, you must also add /NOE to the linker switches to + avoid a duplicate-symbol error from including SETARGV. + +DJGPP (we recommend version 2.0 or later): + Add "-DFREE_MEM_ESTIMATE=0" to CFLAGS. Wild-card expansion is automatic. + + +LEGAL ISSUES +============ + +This software is copyright (C) 1991-1998, Thomas G. Lane. +Terms of distribution and use are the same as for the free IJG JPEG software; +see its README file for details. + +The authors make NO WARRANTY or representation, either express or implied, +with respect to this software, its quality, accuracy, merchantability, or +fitness for a particular purpose. This software is provided "AS IS", and you, +its user, assume the entire risk as to its quality and accuracy. diff --git a/altui/cjpeg.c b/altui/cjpeg.c new file mode 100644 index 0000000..df1a4f8 --- /dev/null +++ b/altui/cjpeg.c @@ -0,0 +1,813 @@ +/* + * alternate cjpeg.c + * + * Copyright (C) 1991-1998, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 6, 2006 + * --------------------------------------------------------------------- + * + * This file contains an alternate user interface for the JPEG compressor. + * One or more input files are named on the command line, and output file + * names are created by substituting ".jpg" for the input file's extension. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifndef PATH_MAX /* ANSI maximum-pathname-length constant */ +#define PATH_MAX 256 +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * SIMD Ext: compiler-specific hacks to enable filename wild-card expansion + */ + +#ifdef _MSC_VER /* Microsoft Visual C++ */ +/* from setargv.c (setargv.obj) */ +/* Tested under Visual C++ V6.0, Toolkit 2003, and 2005 Express Edition */ +int __cdecl _setargv(void) { int __cdecl __setargv(void); return __setargv(); } +#endif +#ifdef __BORLANDC__ /* Borland C++ */ +/* from wildargs.c (wildargs.obj) */ +/* Tested under Borland C++ Compiler 5.5 (win32) */ +#include +typedef void _RTLENTRY (* _RTLENTRY _argv_expand_fnc)(char *, _PFN_ADDARG); +_argv_expand_fnc _argv_expand_ptr = _expand_wild; +#endif + + +/* + * Automatic determination of available memory. + */ + +static long default_maxmem; /* saves value determined at startup, or 0 */ + +#ifndef FREE_MEM_ESTIMATE /* may be defined from command line */ + +#ifdef MSDOS /* For MS-DOS (unless flat-memory model) */ + +#include /* for access to intdos() call */ + +LOCAL(long) +unused_dos_memory (void) +/* Obtain total amount of unallocated DOS memory */ +{ + union REGS regs; + long nparas; + + regs.h.ah = 0x48; /* DOS function Allocate Memory Block */ + regs.x.bx = 0xFFFF; /* Ask for more memory than DOS can have */ + (void) intdos(®s, ®s); + /* DOS will fail and return # of paragraphs actually available in BX. */ + nparas = (unsigned int) regs.x.bx; + /* Times 16 to convert to bytes. */ + return nparas << 4; +} + +/* The default memory setting is 95% of the available space. */ +#define FREE_MEM_ESTIMATE ((unused_dos_memory() * 95L) / 100L) + +#endif /* MSDOS */ + +#ifdef ATARI /* For Atari ST/STE/TT, Pure C or Turbo C */ + +#include + +/* The default memory setting is 90% of the available space. */ +#define FREE_MEM_ESTIMATE (((long) coreleft() * 90L) / 100L) + +#endif /* ATARI */ + +/* Add memory-estimation procedures for other operating systems here, + * with appropriate #ifdef's around them. + */ + +#endif /* !FREE_MEM_ESTIMATE */ + + +/* + * This routine determines what format the input file is, + * and selects the appropriate input-reading module. + * + * To determine which family of input formats the file belongs to, + * we may look only at the first byte of the file, since C does not + * guarantee that more than one character can be pushed back with ungetc. + * Looking at additional bytes would require one of these approaches: + * 1) assume we can fseek() the input file (fails for piped input); + * 2) assume we can push back more than one character (works in + * some C implementations, but unportable); + * 3) provide our own buffering (breaks input readers that want to use + * stdio directly, such as the RLE library); + * or 4) don't put back the data, and modify the input_init methods to assume + * they start reading after the start of file (also breaks RLE library). + * #1 is attractive for MS-DOS but is untenable on Unix. + * + * The most portable solution for file types that can't be identified by their + * first byte is to make the user tell us what they are. This is also the + * only approach for "raw" file types that contain only arbitrary values. + * We presently apply this method for Targa files. Most of the time Targa + * files start with 0x00, so we recognize that case. Potentially, however, + * a Targa file could start with any byte value (byte 0 is the length of the + * seldom-used ID field), so we provide a switch to force Targa input mode. + */ + +static boolean is_targa; /* records user -targa switch */ + + +LOCAL(cjpeg_source_ptr) +select_file_type (j_compress_ptr cinfo, FILE * infile) +{ + int c; + + if (is_targa) { +#ifdef TARGA_SUPPORTED + return jinit_read_targa(cinfo); +#else + ERREXIT(cinfo, JERR_TGA_NOTCOMP); +#endif + } + + if ((c = getc(infile)) == EOF) + ERREXIT(cinfo, JERR_INPUT_EMPTY); + if (ungetc(c, infile) == EOF) + ERREXIT(cinfo, JERR_UNGETC_FAILED); + + switch (c) { +#ifdef BMP_SUPPORTED + case 'B': + return jinit_read_bmp(cinfo); +#endif +#ifdef GIF_SUPPORTED + case 'G': + return jinit_read_gif(cinfo); +#endif +#ifdef PPM_SUPPORTED + case 'P': + return jinit_read_ppm(cinfo); +#endif +#ifdef RLE_SUPPORTED + case 'R': + return jinit_read_rle(cinfo); +#endif +#ifdef TARGA_SUPPORTED + case 0x00: + return jinit_read_targa(cinfo); +#endif + default: + ERREXIT(cinfo, JERR_UNKNOWN_FORMAT); + break; + } + + return NULL; /* suppress compiler warnings */ +} + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + */ + + +static const char * progname; /* program name for error messages */ +static char * outfilename; /* for -outfile switch */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] inputfile(s)\n", progname); + fprintf(stderr, "List of input files may use wildcards (* and ?)\n"); + fprintf(stderr, "Output filename is same as input filename, but extension .jpg\n"); + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -quality N Compression quality (0..100; 5-95 is useful range)\n"); + fprintf(stderr, " -grayscale Create monochrome JPEG file\n"); +#ifdef ENTROPY_OPT_SUPPORTED + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); +#endif +#ifdef C_PROGRESSIVE_SUPPORTED + fprintf(stderr, " -progressive Create progressive JPEG file\n"); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n"); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); +#ifdef INPUT_SMOOTHING_SUPPORTED + fprintf(stderr, " -smooth N Smooth dithered input (N=1..100 is strength)\n"); +#endif +#ifndef FREE_MEM_ESTIMATE + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); +#endif + fprintf(stderr, " -outfile name Specify name for output file\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + fprintf(stderr, "Switches for wizards:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif + fprintf(stderr, " -baseline Force baseline quantization tables\n"); + fprintf(stderr, " -qtables file Use quantization tables given in file\n"); + fprintf(stderr, " -qslots N[,...] Set component quantization tables\n"); + fprintf(stderr, " -sample HxV[,...] Set component sampling factors\n"); +#ifdef C_MULTISCAN_FILES_SUPPORTED + fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); +#endif + exit(EXIT_FAILURE); +} + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +LOCAL(void) +print_simd_info (FILE * file, char * labelstr, unsigned int simd) +{ + fprintf(file, "%s%s%s%s%s%s\n", labelstr, + simd & JSIMD_MMX ? " MMX" : "", + simd & JSIMD_3DNOW ? " 3DNow!" : "", + simd & JSIMD_SSE ? " SSE" : "", + simd & JSIMD_SSE2 ? " SSE2" : "", + simd == JSIMD_NONE ? " NONE" : ""); +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + + +LOCAL(int) +parse_switches (j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char * arg; + int quality; /* -quality parameter */ + int q_scale_factor; /* scaling percentage for -qtables */ + boolean force_baseline; + boolean simple_progressive; + char * qtablefile = NULL; /* saves -qtables filename if any */ + char * qslotsarg = NULL; /* saves -qslots parm if any */ + char * samplearg = NULL; /* saves -sample parm if any */ + char * scansarg = NULL; /* saves -scans parm if any */ + + /* Set up default JPEG parameters. */ + /* Note that default -quality level need not, and does not, + * match the default scaling for an explicit -qtables argument. + */ + quality = 75; /* default -quality value */ + q_scale_factor = 100; /* default to no scaling for -qtables */ + force_baseline = FALSE; /* by default, allow 16-bit quantizers */ + simple_progressive = FALSE; + is_targa = FALSE; + outfilename = NULL; + cinfo->err->trace_level = 0; + if (default_maxmem > 0) /* override library's default value */ + cinfo->mem->max_memory_to_use = default_maxmem; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "arithmetic", 1)) { + /* Use arithmetic coding. */ +#ifdef C_ARITH_CODING_SUPPORTED + cinfo->arith_code = TRUE; +#else + fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "baseline", 1)) { + /* Force baseline-compatible output (8-bit quantizer values). */ + force_baseline = TRUE; + +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + } else if (keymatch(arg, "nosimd" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL); + } else if (keymatch(arg, "nommx" , 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX); + } else if (keymatch(arg, "no3dnow", 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW); + } else if (keymatch(arg, "nosse" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE); + } else if (keymatch(arg, "nosse2" , 6)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2); +#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */ + + } else if (keymatch(arg, "dct", 2)) { + /* Select DCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n", + JVERSION, JCOPYRIGHT); + fprintf(stderr, + "\nx86 SIMD extension for IJG JPEG library, version %s\n\n", + JPEG_SIMDEXT_VER_STR); +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + print_simd_info(stderr, "SIMD instructions supported by the system :", + jpeg_simd_support(NULL)); + + fprintf(stderr, "\n === SIMD Operation Modes ===\n"); +#ifdef DCT_ISLOW_SUPPORTED + print_simd_info(stderr, "Accurate integer DCT (-dct int) :", + jpeg_simd_forward_dct(cinfo, JDCT_ISLOW)); +#endif +#ifdef DCT_IFAST_SUPPORTED + print_simd_info(stderr, "Fast integer DCT (-dct fast) :", + jpeg_simd_forward_dct(cinfo, JDCT_IFAST)); +#endif +#ifdef DCT_FLOAT_SUPPORTED + print_simd_info(stderr, "Floating-point DCT (-dct float) :", + jpeg_simd_forward_dct(cinfo, JDCT_FLOAT)); +#endif + print_simd_info(stderr, "Downsampling (-sample 2x2 or 2x1) :", + jpeg_simd_downsampler(cinfo)); + print_simd_info(stderr, "Colorspace conversion (RGB->YCbCr) :", + jpeg_simd_color_converter(cinfo)); + fprintf(stderr, "\n"); +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force a monochrome JPEG file to be generated. */ + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { + /* Enable entropy parm optimization. */ +#ifdef ENTROPY_OPT_SUPPORTED + cinfo->optimize_coding = TRUE; +#else + fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "progressive", 1)) { + /* Select simple progressive mode. */ +#ifdef C_PROGRESSIVE_SUPPORTED + simple_progressive = TRUE; + /* We must postpone execution until num_components is known. */ +#else + fprintf(stderr, "%s: sorry, progressive output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "quality", 1)) { + /* Quality factor (quantization table scaling factor). */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &quality) != 1) + usage(); + /* Change scale factor in case -qtables is present. */ + q_scale_factor = jpeg_quality_scaling(quality); + + } else if (keymatch(arg, "qslots", 2)) { + /* Quantization table slot numbers. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qslotsarg = argv[argn]; + /* Must delay setting qslots until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default quant table numbers. + */ + + } else if (keymatch(arg, "qtables", 2)) { + /* Quantization tables fetched from file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + qtablefile = argv[argn]; + /* We postpone actually reading the file in case -quality comes later. */ + + } else if (keymatch(arg, "restart", 1)) { + /* Restart interval in MCU rows (or in MCUs with 'b'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (lval < 0 || lval > 65535L) + usage(); + if (ch == 'b' || ch == 'B') { + cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ + } else { + cinfo->restart_in_rows = (int) lval; + /* restart_interval will be computed during startup */ + } + + } else if (keymatch(arg, "sample", 2)) { + /* Set sampling factors. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + samplearg = argv[argn]; + /* Must delay setting sample factors until after we have processed any + * colorspace-determining switches, since jpeg_set_colorspace sets + * default sampling factors. + */ + + } else if (keymatch(arg, "scans", 2)) { + /* Set scan script. */ +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (++argn >= argc) /* advance to next argument */ + usage(); + scansarg = argv[argn]; + /* We must postpone reading the file in case -progressive appears. */ +#else + fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n", + progname); + exit(EXIT_FAILURE); +#endif + + } else if (keymatch(arg, "smooth", 2)) { + /* Set input smoothing factor. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + if (val < 0 || val > 100) + usage(); + cinfo->smoothing_factor = val; + + } else if (keymatch(arg, "targa", 1)) { + /* Input file is Targa format. */ + is_targa = TRUE; + + } else { + usage(); /* bogus switch */ + } + } + + /* Post-switch-scanning cleanup */ + + if (for_real) { + + /* Set quantization tables for selected quality. */ + /* Some or all may be overridden if -qtables is present. */ + jpeg_set_quality(cinfo, quality, force_baseline); + + if (qtablefile != NULL) /* process -qtables if it was present */ + if (! read_quant_tables(cinfo, qtablefile, + q_scale_factor, force_baseline)) + usage(); + + if (qslotsarg != NULL) /* process -qslots if it was present */ + if (! set_quant_slots(cinfo, qslotsarg)) + usage(); + + if (samplearg != NULL) /* process -sample if it was present */ + if (! set_sample_factors(cinfo, samplearg)) + usage(); + +#ifdef C_PROGRESSIVE_SUPPORTED + if (simple_progressive) /* process -progressive; -scans can override */ + jpeg_simple_progression(cinfo); +#endif + +#ifdef C_MULTISCAN_FILES_SUPPORTED + if (scansarg != NULL) /* process -scans if it was present */ + if (! read_scan_script(cinfo, scansarg)) + usage(); +#endif + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * Check for overwrite of an existing file; clear it with user + */ + +#ifndef NO_OVERWRITE_CHECK + +LOCAL(boolean) +is_write_ok (char * outfname) +{ + FILE * ofile; + int ch; + + ofile = fopen(outfname, READ_BINARY); + if (ofile == NULL) + return TRUE; /* not present */ + fclose(ofile); /* oops, it is present */ + + for (;;) { + fprintf(stderr, "%s already exists, overwrite it? [y/n] ", + outfname); + fflush(stderr); + ch = getc(stdin); + if (ch != '\n') /* flush rest of line */ + while (getc(stdin) != '\n') + /* nothing */; + + switch (ch) { + case 'Y': + case 'y': + return TRUE; + case 'N': + case 'n': + return FALSE; + /* otherwise, ask again */ + } + } +} + +#endif + + +/* + * Process a single input file name, and return its index in argv[]. + * File names at or to left of old_file_index have been processed already. + */ + +LOCAL(int) +process_one_file (int argc, char **argv, int old_file_index) +{ + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + char *infilename; + char workfilename[PATH_MAX]; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + cjpeg_source_ptr src_mgr; + FILE * input_file = NULL; + FILE * output_file = NULL; + JDIMENSION num_scanlines; + + /* Initialize the JPEG compression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Now safe to enable signal catcher. */ +#ifdef NEED_SIGNAL_CATCHER + enable_signal_catcher((j_common_ptr) &cinfo); +#endif + + /* Initialize JPEG parameters. + * Much of this may be overridden later. + * In particular, we don't yet know the input file's color space, + * but we need to provide some value for jpeg_set_defaults() to work. + */ + + cinfo.in_color_space = JCS_RGB; /* arbitrary guess */ + jpeg_set_defaults(&cinfo); + + /* Scan command line to find next file name. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + */ + + file_index = parse_switches(&cinfo, argc, argv, old_file_index, FALSE); + if (file_index >= argc) { + fprintf(stderr, "%s: missing input file name\n", progname); + usage(); + } + + /* Open the input file. */ + infilename = argv[file_index]; + if ((input_file = fopen(infilename, READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, infilename); + goto fail; + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Figure out the input file format, and set up to read it. */ + src_mgr = select_file_type(&cinfo, input_file); + src_mgr->input_file = input_file; + + /* Read the input file header to obtain file size & colorspace. */ + (*src_mgr->start_input) (&cinfo, src_mgr); + + /* Now that we know input colorspace, fix colorspace-dependent defaults */ + jpeg_default_colorspace(&cinfo); + + /* Adjust default compression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, old_file_index, TRUE); + + /* If user didn't supply -outfile switch, select output file name. */ + if (outfilename == NULL) { + int i; + + outfilename = workfilename; + /* Make outfilename be infilename with .jpg substituted for extension */ + strcpy(outfilename, infilename); + for (i = strlen(outfilename)-1; i >= 0; i--) { + switch (outfilename[i]) { + case ':': + case '/': + case '\\': + i = 0; /* stop scanning */ + break; + case '.': + outfilename[i] = '\0'; /* lop off existing extension */ + i = 0; /* stop scanning */ + break; + default: + break; /* keep scanning */ + } + } + strcat(outfilename, ".jpg"); + } + + fprintf(stderr, "Compressing %s => %s\n", infilename, outfilename); +#ifndef NO_OVERWRITE_CHECK + if (! is_write_ok(outfilename)) + goto fail; +#endif + + /* Open the output file. */ + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't create %s\n", progname, outfilename); + goto fail; + } + + /* Specify data destination for compression */ + jpeg_stdio_dest(&cinfo, output_file); + + /* Start compressor */ + jpeg_start_compress(&cinfo, TRUE); + + /* Process data */ + while (cinfo.next_scanline < cinfo.image_height) { + num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr); + (void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines); + } + + /* Finish compression and release memory */ + (*src_mgr->finish_input) (&cinfo, src_mgr); + jpeg_finish_compress(&cinfo); + + /* Clean up and exit */ +fail: + jpeg_destroy_compress(&cinfo); + + if (input_file != NULL) fclose(input_file); + if (output_file != NULL) fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + /* Disable signal catcher. */ +#ifdef NEED_SIGNAL_CATCHER + enable_signal_catcher((j_common_ptr) NULL); +#endif + + return file_index; +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int file_index; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + +#ifdef MSDOS + progname = "cjpeg"; /* DOS tends to be too verbose about argv[0] */ +#else + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "cjpeg"; /* in case C library doesn't provide it */ +#endif + + /* The default maxmem must be computed only once at program startup, + * since releasing memory with free() won't give it back to the OS. + */ +#ifdef FREE_MEM_ESTIMATE + default_maxmem = FREE_MEM_ESTIMATE; +#else + default_maxmem = 0; +#endif + + /* Scan command line, parse switches and locate input file names */ + + if (argc < 2) + usage(); /* nothing on the command line?? */ + + file_index = 0; + + while (file_index < argc-1) + file_index = process_one_file(argc, argv, file_index); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff --git a/altui/djpeg.c b/altui/djpeg.c new file mode 100644 index 0000000..a000d45 --- /dev/null +++ b/altui/djpeg.c @@ -0,0 +1,836 @@ +/* + * alternate djpeg.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 6, 2006 + * --------------------------------------------------------------------- + * + * This file contains an alternate user interface for the JPEG decompressor. + * One or more input files are named on the command line, and output file + * names are created by substituting an appropriate extension. + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ +#include "jversion.h" /* for version message */ + +#include /* to declare isprint() */ + +#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ +#ifdef __MWERKS__ +#include /* Metrowerks needs this */ +#include /* ... and this */ +#endif +#ifdef THINK_C +#include /* Think declares it here */ +#endif +#endif + +#ifndef PATH_MAX /* ANSI maximum-pathname-length constant */ +#define PATH_MAX 256 +#endif + + +/* Create the add-on message string table. */ + +#define JMESSAGE(code,string) string , + +static const char * const cdjpeg_message_table[] = { +#include "cderror.h" + NULL +}; + + +/* + * SIMD Ext: compiler-specific hacks to enable filename wild-card expansion + */ + +#ifdef _MSC_VER /* Microsoft Visual C++ */ +/* from setargv.c (setargv.obj) */ +/* Tested under Visual C++ V6.0, Toolkit 2003, and 2005 Express Edition */ +int __cdecl _setargv(void) { int __cdecl __setargv(void); return __setargv(); } +#endif +#ifdef __BORLANDC__ /* Borland C++ */ +/* from wildargs.c (wildargs.obj) */ +/* Tested under Borland C++ Compiler 5.5 (win32) */ +#include +typedef void _RTLENTRY (* _RTLENTRY _argv_expand_fnc)(char *, _PFN_ADDARG); +_argv_expand_fnc _argv_expand_ptr = _expand_wild; +#endif + + +/* + * Automatic determination of available memory. + */ + +static long default_maxmem; /* saves value determined at startup, or 0 */ + +#ifndef FREE_MEM_ESTIMATE /* may be defined from command line */ + +#ifdef MSDOS /* For MS-DOS (unless flat-memory model) */ + +#include /* for access to intdos() call */ + +LOCAL(long) +unused_dos_memory (void) +/* Obtain total amount of unallocated DOS memory */ +{ + union REGS regs; + long nparas; + + regs.h.ah = 0x48; /* DOS function Allocate Memory Block */ + regs.x.bx = 0xFFFF; /* Ask for more memory than DOS can have */ + (void) intdos(®s, ®s); + /* DOS will fail and return # of paragraphs actually available in BX. */ + nparas = (unsigned int) regs.x.bx; + /* Times 16 to convert to bytes. */ + return nparas << 4; +} + +/* The default memory setting is 95% of the available space. */ +#define FREE_MEM_ESTIMATE ((unused_dos_memory() * 95L) / 100L) + +#endif /* MSDOS */ + +#ifdef ATARI /* For Atari ST/STE/TT, Pure C or Turbo C */ + +#include + +/* The default memory setting is 90% of the available space. */ +#define FREE_MEM_ESTIMATE (((long) coreleft() * 90L) / 100L) + +#endif /* ATARI */ + +/* Add memory-estimation procedures for other operating systems here, + * with appropriate #ifdef's around them. + */ + +#endif /* !FREE_MEM_ESTIMATE */ + + +/* + * This list defines the known output image formats + * (not all of which need be supported by a given version). + * You can change the default output format by defining DEFAULT_FMT; + * indeed, you had better do so if you undefine PPM_SUPPORTED. + */ + +typedef enum { + FMT_BMP, /* BMP format (Windows flavor) */ + FMT_GIF, /* GIF format */ + FMT_OS2, /* BMP format (OS/2 flavor) */ + FMT_PPM, /* PPM/PGM (PBMPLUS formats) */ + FMT_RLE, /* RLE format */ + FMT_TARGA, /* Targa format */ + FMT_TIFF /* TIFF format */ +} IMAGE_FORMATS; + +#ifndef DEFAULT_FMT /* so can override from CFLAGS in Makefile */ +#define DEFAULT_FMT FMT_GIF +#endif + +static IMAGE_FORMATS requested_fmt; + + +/* + * Argument-parsing code. + * The switch parser is designed to be useful with DOS-style command line + * syntax, ie, intermixed switches and file names, where only the switches + * to the left of a given file name affect processing of that file. + */ + + +static const char * progname; /* program name for error messages */ +static char * outfilename; /* for -outfile switch */ + + +LOCAL(void) +usage (void) +/* complain about bad command line */ +{ + fprintf(stderr, "usage: %s [switches] inputfile(s)\n", progname); + fprintf(stderr, "List of input files may use wildcards (* and ?)\n"); + fprintf(stderr, "Output filename is same as input filename except for extension\n"); + + fprintf(stderr, "Switches (names may be abbreviated):\n"); + fprintf(stderr, " -colors N Reduce image to no more than N colors\n"); + fprintf(stderr, " -fast Fast, low-quality processing\n"); + fprintf(stderr, " -grayscale Force grayscale output\n"); +#ifdef IDCT_SCALING_SUPPORTED + fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n"); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -bmp Select BMP output format (Windows style)%s\n", + (DEFAULT_FMT == FMT_BMP ? " (default)" : "")); +#endif +#ifdef GIF_SUPPORTED + fprintf(stderr, " -gif Select GIF output format%s\n", + (DEFAULT_FMT == FMT_GIF ? " (default)" : "")); +#endif +#ifdef BMP_SUPPORTED + fprintf(stderr, " -os2 Select BMP output format (OS/2 style)%s\n", + (DEFAULT_FMT == FMT_OS2 ? " (default)" : "")); +#endif +#ifdef PPM_SUPPORTED + fprintf(stderr, " -pnm Select PBMPLUS (PPM/PGM) output format%s\n", + (DEFAULT_FMT == FMT_PPM ? " (default)" : "")); +#endif +#ifdef RLE_SUPPORTED + fprintf(stderr, " -rle Select Utah RLE output format%s\n", + (DEFAULT_FMT == FMT_RLE ? " (default)" : "")); +#endif +#ifdef TARGA_SUPPORTED + fprintf(stderr, " -targa Select Targa output format%s\n", + (DEFAULT_FMT == FMT_TARGA ? " (default)" : "")); +#endif + fprintf(stderr, "Switches for advanced users:\n"); +#ifdef DCT_ISLOW_SUPPORTED + fprintf(stderr, " -dct int Use integer DCT method%s\n", + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); +#endif +#ifdef DCT_IFAST_SUPPORTED + fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n", + (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : "")); +#endif +#ifdef DCT_FLOAT_SUPPORTED + fprintf(stderr, " -dct float Use floating-point DCT method%s\n", + (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); +#endif + fprintf(stderr, " -dither fs Use F-S dithering (default)\n"); + fprintf(stderr, " -dither none Don't use dithering in quantization\n"); + fprintf(stderr, " -dither ordered Use ordered dither (medium speed, quality)\n"); +#ifdef QUANT_2PASS_SUPPORTED + fprintf(stderr, " -map FILE Map to colors used in named image file\n"); +#endif + fprintf(stderr, " -nosmooth Don't use high-quality upsampling\n"); +#ifdef QUANT_1PASS_SUPPORTED + fprintf(stderr, " -onepass Use 1-pass quantization (fast, low quality)\n"); +#endif +#ifndef FREE_MEM_ESTIMATE + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); +#endif + fprintf(stderr, " -outfile name Specify name for output file\n"); + fprintf(stderr, " -verbose or -debug Emit debug output\n"); + exit(EXIT_FAILURE); +} + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +LOCAL(void) +print_simd_info (FILE * file, char * labelstr, unsigned int simd) +{ + fprintf(file, "%s%s%s%s%s%s\n", labelstr, + simd & JSIMD_MMX ? " MMX" : "", + simd & JSIMD_3DNOW ? " 3DNow!" : "", + simd & JSIMD_SSE ? " SSE" : "", + simd & JSIMD_SSE2 ? " SSE2" : "", + simd == JSIMD_NONE ? " NONE" : ""); +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + + +LOCAL(int) +parse_switches (j_decompress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) +/* Parse optional switches. + * Returns argv[] index of first file-name argument (== argc if none). + * Any file names with indexes <= last_file_arg_seen are ignored; + * they have presumably been processed in a previous iteration. + * (Pass 0 for last_file_arg_seen on the first or only iteration.) + * for_real is FALSE on the first (dummy) pass; we may skip any expensive + * processing. + */ +{ + int argn; + char * arg; + + /* Set up default JPEG parameters. */ + requested_fmt = DEFAULT_FMT; /* set default output file format */ + outfilename = NULL; + cinfo->err->trace_level = 0; + if (default_maxmem > 0) /* override library's default value */ + cinfo->mem->max_memory_to_use = default_maxmem; + + /* Scan command line options, adjust parameters */ + + for (argn = 1; argn < argc; argn++) { + arg = argv[argn]; + if (*arg != '-') { + /* Not a switch, must be a file name argument */ + if (argn <= last_file_arg_seen) { + outfilename = NULL; /* -outfile applies to just one input file */ + continue; /* ignore this name if previously processed */ + } + break; /* else done parsing switches */ + } + arg++; /* advance past switch marker character */ + + if (keymatch(arg, "bmp", 1)) { + /* BMP output format. */ + requested_fmt = FMT_BMP; + + } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) || + keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) { + /* Do color quantization. */ + int val; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d", &val) != 1) + usage(); + cinfo->desired_number_of_colors = val; + cinfo->quantize_colors = TRUE; + +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + } else if (keymatch(arg, "nosimd" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL); + } else if (keymatch(arg, "nommx" , 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX); + } else if (keymatch(arg, "no3dnow", 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW); + } else if (keymatch(arg, "nosse" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE); + } else if (keymatch(arg, "nosse2" , 6)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2); +#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */ + + } else if (keymatch(arg, "dct", 2)) { + /* Select IDCT algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "int", 1)) { + cinfo->dct_method = JDCT_ISLOW; + } else if (keymatch(argv[argn], "fast", 2)) { + cinfo->dct_method = JDCT_IFAST; + } else if (keymatch(argv[argn], "float", 2)) { + cinfo->dct_method = JDCT_FLOAT; + } else + usage(); + + } else if (keymatch(arg, "dither", 2)) { + /* Select dithering algorithm. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (keymatch(argv[argn], "fs", 2)) { + cinfo->dither_mode = JDITHER_FS; + } else if (keymatch(argv[argn], "none", 2)) { + cinfo->dither_mode = JDITHER_NONE; + } else if (keymatch(argv[argn], "ordered", 2)) { + cinfo->dither_mode = JDITHER_ORDERED; + } else + usage(); + + } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { + /* Enable debug printouts. */ + /* On first -d, print version identification */ + static boolean printed_version = FALSE; + + if (! printed_version) { + fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n", + JVERSION, JCOPYRIGHT); + fprintf(stderr, + "\nx86 SIMD extension for IJG JPEG library, version %s\n\n", + JPEG_SIMDEXT_VER_STR); +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + print_simd_info(stderr, "SIMD instructions supported by the system :", + jpeg_simd_support(NULL)); + + fprintf(stderr, "\n === SIMD Operation Modes ===\n"); +#ifdef DCT_ISLOW_SUPPORTED + print_simd_info(stderr, "Accurate integer DCT (-dct int) :", + jpeg_simd_inverse_dct(cinfo, JDCT_ISLOW)); +#endif +#ifdef DCT_IFAST_SUPPORTED + print_simd_info(stderr, "Fast integer DCT (-dct fast) :", + jpeg_simd_inverse_dct(cinfo, JDCT_IFAST)); +#endif +#ifdef DCT_FLOAT_SUPPORTED + print_simd_info(stderr, "Floating-point DCT (-dct float) :", + jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT)); +#endif +#ifdef IDCT_SCALING_SUPPORTED + print_simd_info(stderr, "Reduced-size DCT (-scale M/N) :", + jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT+1)); +#endif + print_simd_info(stderr, "High-quality upsampling (default) :", + jpeg_simd_upsampler(cinfo, TRUE)); + print_simd_info(stderr, "Low-quality upsampling (-nosmooth) :", + jpeg_simd_upsampler(cinfo, FALSE)); + print_simd_info(stderr, "Colorspace conversion (YCbCr->RGB) :", + jpeg_simd_color_deconverter(cinfo)); + fprintf(stderr, "\n"); +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + printed_version = TRUE; + } + cinfo->err->trace_level++; + + } else if (keymatch(arg, "fast", 1)) { + /* Select recommended processing options for quick-and-dirty output. */ + cinfo->two_pass_quantize = FALSE; + cinfo->dither_mode = JDITHER_ORDERED; + if (! cinfo->quantize_colors) /* don't override an earlier -colors */ + cinfo->desired_number_of_colors = 216; + cinfo->dct_method = JDCT_FASTEST; + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "gif", 1)) { + /* GIF output format. */ + requested_fmt = FMT_GIF; + + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + /* Force monochrome output. */ + cinfo->out_color_space = JCS_GRAYSCALE; + + } else if (keymatch(arg, "map", 3)) { + /* Quantize to a color map taken from an input file. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (for_real) { /* too expensive to do twice! */ +#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */ + FILE * mapfile; + + if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]); + exit(EXIT_FAILURE); + } + read_color_map(cinfo, mapfile); + fclose(mapfile); + cinfo->quantize_colors = TRUE; +#else + ERREXIT(cinfo, JERR_NOT_COMPILED); +#endif + } + + } else if (keymatch(arg, "maxmemory", 3)) { + /* Maximum memory in Kb (or Mb with 'm'). */ + long lval; + char ch = 'x'; + + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) + usage(); + if (ch == 'm' || ch == 'M') + lval *= 1000L; + cinfo->mem->max_memory_to_use = lval * 1000L; + + } else if (keymatch(arg, "nosmooth", 3)) { + /* Suppress fancy upsampling */ + cinfo->do_fancy_upsampling = FALSE; + + } else if (keymatch(arg, "onepass", 3)) { + /* Use fast one-pass quantization. */ + cinfo->two_pass_quantize = FALSE; + + } else if (keymatch(arg, "os2", 3)) { + /* BMP output format (OS/2 flavor). */ + requested_fmt = FMT_OS2; + + } else if (keymatch(arg, "outfile", 4)) { + /* Set output file name. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + outfilename = argv[argn]; /* save it away for later use */ + + } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) { + /* PPM/PGM output format. */ + requested_fmt = FMT_PPM; + + } else if (keymatch(arg, "rle", 1)) { + /* RLE output format. */ + requested_fmt = FMT_RLE; + + } else if (keymatch(arg, "scale", 1)) { + /* Scale the output image by a fraction M/N. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + if (sscanf(argv[argn], "%d/%d", + &cinfo->scale_num, &cinfo->scale_denom) != 2) + usage(); + + } else if (keymatch(arg, "targa", 1)) { + /* Targa output format. */ + requested_fmt = FMT_TARGA; + + } else { + usage(); /* bogus switch */ + } + } + + return argn; /* return index of next arg (file name) */ +} + + +/* + * Marker processor for COM and interesting APPn markers. + * This replaces the library's built-in processor, which just skips the marker. + * We want to print out the marker as text, to the extent possible. + * Note this code relies on a non-suspending data source. + */ + +LOCAL(unsigned int) +jpeg_getc (j_decompress_ptr cinfo) +/* Read next byte */ +{ + struct jpeg_source_mgr * datasrc = cinfo->src; + + if (datasrc->bytes_in_buffer == 0) { + if (! (*datasrc->fill_input_buffer) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + } + datasrc->bytes_in_buffer--; + return GETJOCTET(*datasrc->next_input_byte++); +} + + +METHODDEF(boolean) +print_text_marker (j_decompress_ptr cinfo) +{ + boolean traceit = (cinfo->err->trace_level >= 1); + INT32 length; + unsigned int ch; + unsigned int lastch = 0; + + length = jpeg_getc(cinfo) << 8; + length += jpeg_getc(cinfo); + length -= 2; /* discount the length word itself */ + + if (traceit) { + if (cinfo->unread_marker == JPEG_COM) + fprintf(stderr, "Comment, length %ld:\n", (long) length); + else /* assume it is an APPn otherwise */ + fprintf(stderr, "APP%d, length %ld:\n", + cinfo->unread_marker - JPEG_APP0, (long) length); + } + + while (--length >= 0) { + ch = jpeg_getc(cinfo); + if (traceit) { + /* Emit the character in a readable form. + * Nonprintables are converted to \nnn form, + * while \ is converted to \\. + * Newlines in CR, CR/LF, or LF form will be printed as one newline. + */ + if (ch == '\r') { + fprintf(stderr, "\n"); + } else if (ch == '\n') { + if (lastch != '\r') + fprintf(stderr, "\n"); + } else if (ch == '\\') { + fprintf(stderr, "\\\\"); + } else if (isprint(ch)) { + putc(ch, stderr); + } else { + fprintf(stderr, "\\%03o", ch); + } + lastch = ch; + } + } + + if (traceit) + fprintf(stderr, "\n"); + + return TRUE; +} + + +/* + * Check for overwrite of an existing file; clear it with user + */ + +#ifndef NO_OVERWRITE_CHECK + +LOCAL(boolean) +is_write_ok (char * outfname) +{ + FILE * ofile; + int ch; + + ofile = fopen(outfname, READ_BINARY); + if (ofile == NULL) + return TRUE; /* not present */ + fclose(ofile); /* oops, it is present */ + + for (;;) { + fprintf(stderr, "%s already exists, overwrite it? [y/n] ", + outfname); + fflush(stderr); + ch = getc(stdin); + if (ch != '\n') /* flush rest of line */ + while (getc(stdin) != '\n') + /* nothing */; + + switch (ch) { + case 'Y': + case 'y': + return TRUE; + case 'N': + case 'n': + return FALSE; + /* otherwise, ask again */ + } + } +} + +#endif + + +/* + * Process a single input file name, and return its index in argv[]. + * File names at or to left of old_file_index have been processed already. + */ + +LOCAL(int) +process_one_file (int argc, char **argv, int old_file_index) +{ + struct jpeg_decompress_struct cinfo; + struct jpeg_error_mgr jerr; + char *infilename; + char workfilename[PATH_MAX]; + const char *default_extension = NULL; +#ifdef PROGRESS_REPORT + struct cdjpeg_progress_mgr progress; +#endif + int file_index; + djpeg_dest_ptr dest_mgr = NULL; + FILE * input_file = NULL; + FILE * output_file = NULL; + JDIMENSION num_scanlines; + + /* Initialize the JPEG decompression object with default error handling. */ + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + /* Add some application-specific error messages (from cderror.h) */ + jerr.addon_message_table = cdjpeg_message_table; + jerr.first_addon_message = JMSG_FIRSTADDONCODE; + jerr.last_addon_message = JMSG_LASTADDONCODE; + + /* Insert custom marker processor for COM and APP12. + * APP12 is used by some digital camera makers for textual info, + * so we provide the ability to display it as text. + * If you like, additional APPn marker types can be selected for display, + * but don't try to override APP0 or APP14 this way (see libjpeg.doc). + */ + jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker); + jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker); + + /* Now safe to enable signal catcher. */ +#ifdef NEED_SIGNAL_CATCHER + enable_signal_catcher((j_common_ptr) &cinfo); +#endif + + /* Scan command line to find next file name. + * It is convenient to use just one switch-parsing routine, but the switch + * values read here are ignored; we will rescan the switches after opening + * the input file. + * (Exception: tracing level set here controls verbosity for COM markers + * found during jpeg_read_header...) + */ + + file_index = parse_switches(&cinfo, argc, argv, old_file_index, FALSE); + if (file_index >= argc) { + fprintf(stderr, "%s: missing input file name\n", progname); + usage(); + } + + /* Open the input file. */ + infilename = argv[file_index]; + if ((input_file = fopen(infilename, READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, infilename); + goto fail; + } + +#ifdef PROGRESS_REPORT + start_progress_monitor((j_common_ptr) &cinfo, &progress); +#endif + + /* Specify data source for decompression */ + jpeg_stdio_src(&cinfo, input_file); + + /* Read file header, set default decompression parameters */ + (void) jpeg_read_header(&cinfo, TRUE); + + /* Adjust default decompression parameters by re-parsing the options */ + file_index = parse_switches(&cinfo, argc, argv, old_file_index, TRUE); + + /* Initialize the output module now to let it override any crucial + * option settings (for instance, GIF wants to force color quantization). + */ + switch (requested_fmt) { +#ifdef BMP_SUPPORTED + case FMT_BMP: + dest_mgr = jinit_write_bmp(&cinfo, FALSE); + default_extension = ".bmp"; + break; + case FMT_OS2: + dest_mgr = jinit_write_bmp(&cinfo, TRUE); + default_extension = ".bmp"; + break; +#endif +#ifdef GIF_SUPPORTED + case FMT_GIF: + dest_mgr = jinit_write_gif(&cinfo); + default_extension = ".gif"; + break; +#endif +#ifdef PPM_SUPPORTED + case FMT_PPM: + dest_mgr = jinit_write_ppm(&cinfo); + default_extension = ".ppm"; + break; +#endif +#ifdef RLE_SUPPORTED + case FMT_RLE: + dest_mgr = jinit_write_rle(&cinfo); + default_extension = ".rle"; + break; +#endif +#ifdef TARGA_SUPPORTED + case FMT_TARGA: + dest_mgr = jinit_write_targa(&cinfo); + default_extension = ".tga"; + break; +#endif + default: + ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT); + break; + } + + /* If user didn't supply -outfile switch, select output file name. */ + if (outfilename == NULL) { + int i; + + outfilename = workfilename; + /* Make outfilename be infilename with appropriate extension */ + strcpy(outfilename, infilename); + for (i = strlen(outfilename)-1; i >= 0; i--) { + switch (outfilename[i]) { + case ':': + case '/': + case '\\': + i = 0; /* stop scanning */ + break; + case '.': + outfilename[i] = '\0'; /* lop off existing extension */ + i = 0; /* stop scanning */ + break; + default: + break; /* keep scanning */ + } + } + strcat(outfilename, default_extension); + } + + fprintf(stderr, "Decompressing %s => %s\n", infilename, outfilename); +#ifndef NO_OVERWRITE_CHECK + if (! is_write_ok(outfilename)) + goto fail; +#endif + + /* Open the output file. */ + if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't create %s\n", progname, outfilename); + goto fail; + } + dest_mgr->output_file = output_file; + + /* Start decompressor */ + (void) jpeg_start_decompress(&cinfo); + + /* Write output file header */ + (*dest_mgr->start_output) (&cinfo, dest_mgr); + + /* Process data */ + while (cinfo.output_scanline < cinfo.output_height) { + num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, + dest_mgr->buffer_height); + (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); + } + +#ifdef PROGRESS_REPORT + /* Hack: count final pass as done in case finish_output does an extra pass. + * The library won't have updated completed_passes. + */ + progress.pub.completed_passes = progress.pub.total_passes; +#endif + + /* Finish decompression and release memory. + * I must do it in this order because output module has allocated memory + * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory. + */ + (*dest_mgr->finish_output) (&cinfo, dest_mgr); + (void) jpeg_finish_decompress(&cinfo); + + /* Clean up and exit */ +fail: + jpeg_destroy_decompress(&cinfo); + + if (input_file != NULL) fclose(input_file); + if (output_file != NULL) fclose(output_file); + +#ifdef PROGRESS_REPORT + end_progress_monitor((j_common_ptr) &cinfo); +#endif + + /* Disable signal catcher. */ +#ifdef NEED_SIGNAL_CATCHER + enable_signal_catcher((j_common_ptr) NULL); +#endif + + return file_index; +} + + +/* + * The main program. + */ + +int +main (int argc, char **argv) +{ + int file_index; + + /* On Mac, fetch a command line. */ +#ifdef USE_CCOMMAND + argc = ccommand(&argv); +#endif + +#ifdef MSDOS + progname = "djpeg"; /* DOS tends to be too verbose about argv[0] */ +#else + progname = argv[0]; + if (progname == NULL || progname[0] == 0) + progname = "djpeg"; /* in case C library doesn't provide it */ +#endif + + /* The default maxmem must be computed only once at program startup, + * since releasing memory with free() won't give it back to the OS. + */ +#ifdef FREE_MEM_ESTIMATE + default_maxmem = FREE_MEM_ESTIMATE; +#else + default_maxmem = 0; +#endif + + /* Scan command line, parse switches and locate input file names */ + + if (argc < 2) + usage(); /* nothing on the command line?? */ + + file_index = 0; + + while (file_index < argc-1) + file_index = process_one_file(argc, argv, file_index); + + /* All done. */ + exit(EXIT_SUCCESS); + return 0; /* suppress no-return-value warnings */ +} diff --git a/altui/usage.alt b/altui/usage.alt new file mode 100644 index 0000000..277332b --- /dev/null +++ b/altui/usage.alt @@ -0,0 +1,62 @@ +(Most of the standard usage.doc file also applies to this alternate version, +but replace its "GENERAL USAGE" section with the text below. Edit the text +as necessary if you don't support wildcards or overwrite checking. Be sure +to fix the djpeg switch descriptions if you are not defaulting to PPM output. +Also, if you've provided an accurate memory-estimation procedure, you can +probably eliminate the HINTS related to the -maxmemory switch.) + + +GENERAL USAGE + +We provide two programs, cjpeg to compress an image file into JPEG format, +and djpeg to decompress a JPEG file back into a conventional image format. + +The basic command line is: + cjpeg [switches] list of image files +or + djpeg [switches] list of jpeg files + +Each file named is compressed or decompressed. The input file(s) are not +modified; the output data is written to files which have the same names +except for extension. cjpeg always uses ".jpg" for the output file name's +extension; djpeg uses one of ".bmp", ".gif", ".ppm", ".rle", or ".tga", +depending on what output format is selected by the switches. + +For example, to convert xxx.bmp to xxx.jpg and yyy.ppm to yyy.jpg, say: + cjpeg xxx.bmp yyy.ppm + +On most systems you can use standard wildcards to specify the list of input +files; for example, on DOS "djpeg *.jpg" decompresses all the JPEG files in +the current directory. + +If an intended output file already exists, you'll be asked whether or not to +overwrite it. If you say no, the program skips that input file and goes on +to the next one. + +You can intermix switches and file names; for example + djpeg -gif file1.jpg -targa file2.jpg +decompresses file1.jpg into GIF format (file1.gif) and file2.jpg into Targa +format (file2.tga). Only switches to the left of a given file name affect +processing of that file; when there are conflicting switches, the rightmost +one takes precedence. + +You can override the program's choice of output file name by using the +-outfile switch, as in + cjpeg -outfile output.jpg input.ppm +-outfile only affects the first input file name to its right. + +The currently supported image file formats are: PPM (PBMPLUS color format), +PGM (PBMPLUS gray-scale format), BMP, GIF, Targa, and RLE (Utah Raster +Toolkit format). (RLE is supported only if the URT library is available, +which it isn't on most non-Unix systems.) cjpeg recognizes the input image +format automatically, with the exception of some Targa-format files. You +have to tell djpeg which format to generate. + +JPEG files are in the defacto standard JFIF file format. There are other, +less widely used JPEG-based file formats, but we don't support them. + +All switch names may be abbreviated; for example, -grayscale may be written +-gray or -gr. Most of the "basic" switches can be abbreviated to as little as +one letter. Upper and lower case are equivalent (-BMP is the same as -bmp). +British spellings are also accepted (e.g., -greyscale), though for brevity +these are not mentioned below. diff --git a/cjpeg.c b/cjpeg.c index f2a929f..10f5f5b 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : August 23, 2005 + * --------------------------------------------------------------------- + * * This file contains a command-line user interface for the JPEG compressor. * It should work on any system with Unix- or MS-DOS-style command lines. * @@ -195,6 +202,22 @@ usage (void) } +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +LOCAL(void) +print_simd_info (FILE * file, char * labelstr, unsigned int simd) +{ + fprintf(file, "%s%s%s%s%s%s\n", labelstr, + simd & JSIMD_MMX ? " MMX" : "", + simd & JSIMD_3DNOW ? " 3DNow!" : "", + simd & JSIMD_SSE ? " SSE" : "", + simd & JSIMD_SSE2 ? " SSE2" : "", + simd == JSIMD_NONE ? " NONE" : ""); +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + + LOCAL(int) parse_switches (j_compress_ptr cinfo, int argc, char **argv, int last_file_arg_seen, boolean for_real) @@ -258,6 +281,19 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Force baseline-compatible output (8-bit quantizer values). */ force_baseline = TRUE; +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + } else if (keymatch(arg, "nosimd" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL); + } else if (keymatch(arg, "nommx" , 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX); + } else if (keymatch(arg, "no3dnow", 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW); + } else if (keymatch(arg, "nosse" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE); + } else if (keymatch(arg, "nosse2" , 6)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2); +#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */ + } else if (keymatch(arg, "dct", 2)) { /* Select DCT algorithm. */ if (++argn >= argc) /* advance to next argument */ @@ -279,6 +315,32 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, if (! printed_version) { fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n", JVERSION, JCOPYRIGHT); + fprintf(stderr, + "\nx86 SIMD extension for IJG JPEG library, version %s\n\n", + JPEG_SIMDEXT_VER_STR); +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + print_simd_info(stderr, "SIMD instructions supported by the system :", + jpeg_simd_support(NULL)); + + fprintf(stderr, "\n === SIMD Operation Modes ===\n"); +#ifdef DCT_ISLOW_SUPPORTED + print_simd_info(stderr, "Accurate integer DCT (-dct int) :", + jpeg_simd_forward_dct(cinfo, JDCT_ISLOW)); +#endif +#ifdef DCT_IFAST_SUPPORTED + print_simd_info(stderr, "Fast integer DCT (-dct fast) :", + jpeg_simd_forward_dct(cinfo, JDCT_IFAST)); +#endif +#ifdef DCT_FLOAT_SUPPORTED + print_simd_info(stderr, "Floating-point DCT (-dct float) :", + jpeg_simd_forward_dct(cinfo, JDCT_FLOAT)); +#endif + print_simd_info(stderr, "Downsampling (-sample 2x2 or 2x1) :", + jpeg_simd_downsampler(cinfo)); + print_simd_info(stderr, "Colorspace conversion (RGB->YCbCr) :", + jpeg_simd_color_converter(cinfo)); + fprintf(stderr, "\n"); +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ printed_version = TRUE; } cinfo->err->trace_level++; diff --git a/ckconfig.c b/ckconfig.c index 34baf79..ba380dc 100644 --- a/ckconfig.c +++ b/ckconfig.c @@ -4,6 +4,13 @@ * Copyright (C) 1991-1994, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. + * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : March 28, 2005 + * --------------------------------------------------------------------- */ /* @@ -361,6 +368,10 @@ int main (argc, argv) fprintf(outfile, "#define INCOMPLETE_TYPES_BROKEN\n"); #else fprintf(outfile, "#undef INCOMPLETE_TYPES_BROKEN\n"); +#endif +#ifdef _WIN32 + fprintf(outfile, "\n/* Define "boolean" as unsigned char, not int, per Windows custom */\n"); + fprintf(outfile, "#define TYPEDEF_UCHAR_BOOLEAN\n"); #endif fprintf(outfile, "\n#ifdef JPEG_INTERNALS\n\n"); if (is_shifting_signed(-0x7F7E80B1L)) @@ -368,6 +379,14 @@ int main (argc, argv) else fprintf(outfile, "#define RIGHT_SHIFT_IS_UNSIGNED\n"); fprintf(outfile, "\n#endif /* JPEG_INTERNALS */\n"); + + fprintf(outfile, "\n#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS)\n"); + fprintf(outfile, "#undef JSIMD_MMX_NOT_SUPPORTED\n"); + fprintf(outfile, "#undef JSIMD_3DNOW_NOT_SUPPORTED\n"); + fprintf(outfile, "#undef JSIMD_SSE_NOT_SUPPORTED\n"); + fprintf(outfile, "#undef JSIMD_SSE2_NOT_SUPPORTED\n"); + fprintf(outfile, "#endif\n"); + fprintf(outfile, "\n#ifdef JPEG_CJPEG_DJPEG\n\n"); fprintf(outfile, "#define BMP_SUPPORTED /* BMP image file format */\n"); fprintf(outfile, "#define GIF_SUPPORTED /* GIF image file format */\n"); @@ -375,6 +394,9 @@ int main (argc, argv) fprintf(outfile, "#undef RLE_SUPPORTED /* Utah RLE image file format */\n"); fprintf(outfile, "#define TARGA_SUPPORTED /* Targa image file format */\n\n"); fprintf(outfile, "#undef TWO_FILE_COMMANDLINE /* You may need this on non-Unix systems */\n"); +#ifdef _WIN32 + fprintf(outfile, "#define USE_SETMODE /* Needed to make one-file style work */\n"); +#endif fprintf(outfile, "#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */\n"); fprintf(outfile, "#undef DONT_USE_B_MODE\n"); fprintf(outfile, "/* #define PROGRESS_REPORT */ /* optional */\n"); diff --git a/config.guess b/config.guess old mode 100755 new mode 100644 index 413ed41..fb25fa4 --- a/config.guess +++ b/config.guess @@ -1,7 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright (C) 1992, 93, 94, 95, 96, 1997 Free Software Foundation, Inc. -# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + +timestamp='2006-01-30' + # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or @@ -14,154 +17,326 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. -# Written by Per Bothner . -# The master version of this file is at the FSF in /home/gd/gnu/lib. + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # # The plan is that this can be called by configure scripts if you -# don't specify an explicit system type (host/target name). -# -# Only a few systems have been added to this list; please add others -# (but try to keep the structure clean). -# +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 8/24/94.) +# (ghazi@noc.rutgers.edu 1994-08-24) if (test -f /.attbin/uname) >/dev/null 2>&1 ; then PATH=$PATH:/.attbin ; export PATH fi UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown -trap 'rm -f dummy.c dummy.o dummy; exit 1' 1 2 15 - # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerppc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; alpha:OSF1:*:*) - if test $UNAME_RELEASE = "V4.0"; then + case $UNAME_RELEASE in + *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - fi + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - cat <dummy.s - .globl main - .ent main -main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - ${CC-cc} dummy.s -o dummy 2>/dev/null - if test "$?" = 0 ; then - ./dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - fi - rm -f dummy.s dummy - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr [[A-Z]] [[a-z]]` - exit 0 ;; + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 - exit 0 ;; + exit ;; Amiga*:UNIX_System_V:4.0:*) - echo m68k-cbm-sysv4 - exit 0;; - amiga:NetBSD:*:*) - echo m68k-cbm-netbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc64:OpenBSD:*:*) - echo mips64el-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hkmips:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; - arm32:NetBSD:*:*) - echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - SR2?01:HI-UX/MPP:*:*) + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp - exit 0;; - Pyramid*:OSx*:*:*|MIS*:OSx*:*:*) + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then echo pyramid-pyramid-sysv3 else echo pyramid-pyramid-bsd fi - exit 0 ;; - NILE:*:*:dcosx) + exit ;; + NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 - exit 0 ;; + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; i86pc:SunOS:5.*:*) echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) @@ -170,12 +345,12 @@ EOF esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; + exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; sun*:*:4.2BSD:*) - UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) @@ -185,52 +360,63 @@ EOF echo sparc-sun-sunos${UNAME_RELEASE} ;; esac - exit 0 ;; + exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; - atari*:NetBSD:*:*) - echo m68k-atari-netbsd${UNAME_RELEASE} - exit 0 ;; - atari*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3*:NetBSD:*:*) - echo m68k-sun-netbsd${UNAME_RELEASE} - exit 0 ;; - sun3*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:NetBSD:*:*) - echo m68k-apple-netbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 - exit 0 ;; + exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - 2020:CLIX:*:*) + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; + exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) - sed 's/^ //' << EOF >dummy.c - int main (argc, argv) int argc; char **argv; { + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); @@ -245,62 +431,83 @@ EOF exit (-1); } EOF - ${CC-cc} dummy.c -o dummy \ - && ./dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix - exit 0 ;; + exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 - exit 0 ;; + exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 - exit 0 ;; + exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 -o $UNAME_PROCESSOR = mc88110 ] ; then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx \ - -o ${TARGET_BINARY_INTERFACE}x = x ] ; then + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then echo m88k-dg-dgux${UNAME_RELEASE} - else + else echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} fi - else echo i586-dg-dgux${UNAME_RELEASE} - fi - exit 0 ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 - exit 0 ;; + exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 - exit 0 ;; + exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd - exit 0 ;; + exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; + exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i?86:AIX:*:*) + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) echo i386-ibm-aix - exit 0 ;; + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - sed 's/^ //' << EOF >dummy.c + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c #include main() @@ -311,17 +518,21 @@ EOF exit(0); } EOF - ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - echo rs6000-ibm-aix3.2.5 + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi - exit 0 ;; - *:AIX:*:4) - if /usr/sbin/lsattr -EHl proc0 | grep POWER >/dev/null 2>&1; then + exit ;; + *:AIX:*:[45]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc @@ -329,43 +540,120 @@ EOF if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else - IBM_REV=4.${UNAME_RELEASE} + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:*:*) echo rs6000-ibm-aix - exit 0 ;; + exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 - exit 0 ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC NetBSD and + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 + exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx - exit 0 ;; + exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 - exit 0 ;; + exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd - exit 0 ;; + exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 - exit 0 ;; - 9000/[3478]??:HP-UX:*:*) + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in 9000/31? ) HP_ARCH=m68000 ;; 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/7?? | 9000/8?[1679] ) HP_ARCH=hppa1.1 ;; - 9000/8?? ) HP_ARCH=hppa1.0 ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; esac - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; 3050*:HI-UX:*:*) - sed 's/^ //' << EOF >dummy.c + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c #include int main () @@ -390,324 +678,467 @@ EOF exit (0); } EOF - ${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 - exit 0 ;; + exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd - exit 0 ;; + exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd - exit 0 ;; + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf - exit 0 ;; + exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf - exit 0 ;; - i?86:OSF1:*:*) + exit ;; + i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi - exit 0 ;; + exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites - exit 0 ;; + exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit 0 ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit 0 ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit 0 ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit 0 ;; - CRAY*X-MP:*:*:*) - echo xmp-cray-unicos - exit 0 ;; + exit ;; CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} - exit 0 ;; + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ - exit 0 ;; + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} - exit 0 ;; - CRAY-2:*:*:*) - echo cray2-cray-unicos - exit 0 ;; - F300:UNIX_System_V:*:*) - FUJITSU_SYS=`uname -p | tr [A-Z] [a-z] | sed -e 's/\///'` + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - F301:UNIX_System_V:*:*) - echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'` - exit 0 ;; - hp3[0-9][05]:NetBSD:*:*) - echo m68k-hp-netbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - i?86:BSD/386:*:* | *:BSD/OS:*:*) + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; *:FreeBSD:*:*) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; - *:NetBSD:*:*) - echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; i*:CYGWIN*:*) - echo i386-pc-cygwin32 - exit 0 ;; + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; i*:MINGW*:*) - echo i386-pc-mingw32 - exit 0 ;; + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + x86:Interix*:[345]*) + echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' + exit ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin32 - exit 0 ;; + echo powerpcle-unknown-cygwin + exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; *:GNU:*:*) + # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; - *:Linux:*:*) + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) # The BFD linker knows what the default object file format is, so - # first see if it will tell us. - ld_help_string=`ld --help 2>&1` - ld_supported_emulations=`echo $ld_help_string \ - | sed -ne '/supported emulations:/!d + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d s/[ ][ ]*/ /g - s/.*supported emulations: *// + s/.*supported targets: *// s/ .*// p'` - case "$ld_supported_emulations" in - i?86linux) echo "${UNAME_MACHINE}-pc-linux-gnuaout" ; exit 0 ;; - i?86coff) echo "${UNAME_MACHINE}-pc-linux-gnucoff" ; exit 0 ;; - sparclinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;; - m68klinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;; - elf32ppc) echo "powerpc-unknown-linux-gnu" ; exit 0 ;; + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; esac - - if test "${UNAME_MACHINE}" = "alpha" ; then - sed 's/^ //' <dummy.s - .globl main - .ent main - main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - LIBC="" - ${CC-cc} dummy.s -o dummy 2>/dev/null - if test "$?" = 0 ; then - ./dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - - objdump --private-headers dummy | \ - grep ld.so.1 > /dev/null - if test "$?" = 0 ; then - LIBC="libc1" - fi - fi - rm -f dummy.s dummy - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0 - elif test "${UNAME_MACHINE}" = "mips" ; then - cat >dummy.c </dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - else - # Either a pre-BFD a.out linker (linux-gnuoldld) - # or one that does not give us useful --help. - # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout. - # If ld does not provide *any* "supported emulations:" - # that means it is gnuoldld. - echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:" - test $? != 0 && echo "${UNAME_MACHINE}-pc-linux-gnuoldld" && exit 0 - - case "${UNAME_MACHINE}" in - i?86) - VENDOR=pc; - ;; - *) - VENDOR=unknown; - ;; - esac - # Determine whether the default compiler is a.out or elf - cat >dummy.c < -main(argc, argv) - int argc; - char *argv[]; -{ -#ifdef __ELF__ -# ifdef __GLIBC__ -# if __GLIBC__ >= 2 - printf ("%s-${VENDOR}-linux-gnu\n", argv[1]); -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -#else - printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]); -#endif - return 0; -} + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #if defined(__INTEL_COMPILER) || defined(__PGI) + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif EOF - ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0 - rm -f dummy.c dummy - fi ;; -# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions -# are messed up and put the nodename in both sysname and nodename. - i?86:DYNIX/ptx:4*:*) + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^LIBC/{ + s: ::g + p + }'`" + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. echo i386-sequent-sysv4 - exit 0 ;; - i?86:UNIX_SV:4.2MP:2.*) + exit ;; + i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; - i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*) + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_RELEASE} + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_RELEASE} + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi - exit 0 ;; - i?86:*:3.2:*) + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')` - (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \ + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 echo ${UNAME_MACHINE}-pc-sco$UNAME_REL else echo ${UNAME_MACHINE}-pc-sysv32 fi - exit 0 ;; + exit ;; pc:*:*:*) + # Left here for compatibility: # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i386. echo i386-pc-msdosdjgpp - exit 0 ;; + exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 - exit 0 ;; + exit ;; paragon:*:*:*) echo i860-intel-osf1 - exit 0 ;; + exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi - exit 0 ;; + exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0) + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; - m68*:LynxOS:2.*:*) + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 - exit 0 ;; - i?86:LynxOS:2.*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*) + exit ;; + rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` @@ -715,39 +1146,156 @@ EOF else echo ns32k-sni-sysv fi - exit 0 ;; - PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says echo i586-unisys-sysv4 - exit 0 ;; + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 - exit 0 ;; + exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 - exit 0 ;; + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; - news*:NEWS-OS:*:6*) + exit ;; + news*:NEWS-OS:6*:*) echo mips-sony-newsos6 - exit 0 ;; - R3000:*System_V*:*:* | R4000:UNIX_SYSV:*:*) + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then echo mips-nec-sysv${UNAME_RELEASE} else echo mips-unknown-sysv${UNAME_RELEASE} fi - exit 0 ;; + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 #echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 -cat >dummy.c <$dummy.c < # include @@ -772,7 +1320,7 @@ main () #endif #if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); + printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) @@ -785,7 +1333,10 @@ main () #endif int version; version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); exit (0); #endif @@ -830,11 +1381,24 @@ main () #endif #if defined (vax) -#if !defined (ultrix) - printf ("vax-dec-bsd\n"); exit (0); -#else - printf ("vax-dec-ultrix\n"); exit (0); -#endif +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif #endif #if defined (alliant) && defined (i860) @@ -845,12 +1409,12 @@ main () } EOF -${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy && rm dummy.c dummy && exit 0 -rm -f dummy.c dummy +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) @@ -859,25 +1423,69 @@ then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd - exit 0 ;; + exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; c34*) echo c34-convex-bsd - exit 0 ;; + exit ;; c38*) echo c38-convex-bsd - exit 0 ;; + exit ;; c4*) echo c4-convex-bsd - exit 0 ;; + exit ;; esac fi -#echo '(Unable to guess system type)' 1>&2 +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.sub b/config.sub old mode 100755 new mode 100644 index 213a6d4..a4e8a94 --- a/config.sub +++ b/config.sub @@ -1,6 +1,10 @@ #! /bin/sh -# Configuration validation subroutine script, version 1.1. -# Copyright (C) 1991, 92, 93, 94, 95, 96, 1997 Free Software Foundation, Inc. +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + +timestamp='2006-01-02' + # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software # can handle that machine. It does not imply ALL GNU software can. @@ -17,14 +21,18 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. @@ -45,30 +53,75 @@ # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. -if [ x$1 = x ] -then - echo Configuration name missing. 1>&2 - echo "Usage: $0 CPU-MFR-OPSYS" 1>&2 - echo "or $0 ALIAS" 1>&2 - echo where ALIAS is a recognized configuration type. 1>&2 - exit 1 -fi +me=`echo "$0" | sed -e 's,.*/,,'` -# First pass through any local machine types. -case $1 in - *local*) - echo $1 - exit 0 - ;; - *) - ;; +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; esac # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in - linux-gnu*) + nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ + uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; @@ -94,15 +147,37 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple) + -apple | -axis | -knuth | -cray) os= basic_machine=$1 ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; -hiux*) os=-hiuxwe2 ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco5) - os=sco3.2v5 + os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco4) @@ -117,10 +192,17 @@ case $os in # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -isc) os=-isc2.2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -143,26 +225,84 @@ case $os in -psos*) os=-psos ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; esac # Decode aliases for certain CPU-COMPANY combinations. case $basic_machine in # Recognize the basic CPU types without company name. # Some are omitted here because they have special meanings below. - tahoe | i860 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \ - | arme[lb] | pyramid | mn10200 | mn10300 \ - | tron | a29k | 580 | i960 | h8300 | hppa | hppa1.0 | hppa1.1 \ - | alpha | alphaev5 | alphaev56 | we32k | ns16k | clipper \ - | i370 | sh | powerpc | powerpcle | 1750a | dsp16xx | pdp11 \ - | mips64 | mipsel | mips64el | mips64orion | mips64orionel \ - | mipstx39 | mipstx39el \ - | sparc | sparclet | sparclite | sparc64 | v850) + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32r | m32rle | m68000 | m68k | m88k | maxq | mb | microblaze | mcore \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | mt \ + | msp430 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b \ + | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m32c) basic_machine=$basic_machine-unknown ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + # We use `pc' rather than `unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. - i[3456]86) + i*86 | x86_64) basic_machine=$basic_machine-pc ;; # Object if more than one company name word. @@ -171,27 +311,91 @@ case $basic_machine in exit 1 ;; # Recognize the basic CPU types with company name. - vax-* | tahoe-* | i[3456]86-* | i860-* | m32r-* | m68k-* | m68000-* \ - | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \ - | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \ - | power-* | none-* | 580-* | cray2-* | h8300-* | i960-* \ - | xmp-* | ymp-* | hppa-* | hppa1.0-* | hppa1.1-* \ - | alpha-* | alphaev5-* | alphaev56-* | we32k-* | cydra-* \ - | ns16k-* | pn-* | np1-* | xps100-* | clipper-* | orion-* \ - | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \ - | sparc64-* | mips64-* | mipsel-* \ - | mips64el-* | mips64orion-* | mips64orionel-* \ - | mipstx39-* | mipstx39el-* \ - | f301-*) + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa-* \ + | ymp-* \ + | z8k-*) + ;; + m32c-*) ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) basic_machine=m68000-att ;; 3b*) basic_machine=we32k-att ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; alliant | fx80) basic_machine=fx80-alliant ;; @@ -202,25 +406,35 @@ case $basic_machine in basic_machine=a29k-none os=-bsd ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; amdahl) basic_machine=580-amdahl os=-sysv ;; amiga | amiga-*) - basic_machine=m68k-cbm + basic_machine=m68k-unknown ;; amigaos | amigados) - basic_machine=m68k-cbm + basic_machine=m68k-unknown os=-amigaos ;; amigaunix | amix) - basic_machine=m68k-cbm + basic_machine=m68k-unknown os=-sysv4 ;; apollo68) basic_machine=m68k-apollo os=-sysv ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; aux) basic_machine=m68k-apple os=-aux @@ -229,6 +443,10 @@ case $basic_machine in basic_machine=ns32k-sequent os=-dynix ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; convex-c1) basic_machine=c1-convex os=-bsd @@ -249,27 +467,45 @@ case $basic_machine in basic_machine=c38-convex os=-bsd ;; - cray | ymp) - basic_machine=ymp-cray + cray | j90) + basic_machine=j90-cray os=-unicos ;; - cray2) - basic_machine=cray2-cray - os=-unicos + craynv) + basic_machine=craynv-cray + os=-unicosmp ;; - [ctj]90-cray) - basic_machine=c90-cray - os=-unicos + cr16c) + basic_machine=cr16c-unknown + os=-elf ;; crds | unos) basic_machine=m68k-crds ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; da30 | da30-*) basic_machine=m68k-da30 ;; decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) basic_machine=mips-dec ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) basic_machine=m68k-motorola @@ -278,6 +514,10 @@ case $basic_machine in basic_machine=m88k-motorola os=-sysv3 ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx @@ -297,6 +537,10 @@ case $basic_machine in encore | umax | mmax) basic_machine=ns32k-encore ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; fx2800) basic_machine=i860-alliant ;; @@ -307,6 +551,10 @@ case $basic_machine in basic_machine=tron-gmicro os=-sysv ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; h3050r* | hiux*) basic_machine=hppa1.1-hitachi os=-hiuxwe2 @@ -315,6 +563,14 @@ case $basic_machine in basic_machine=h8300-hitachi os=-hms ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; harris) basic_machine=m88k-harris os=-sysv3 @@ -330,13 +586,30 @@ case $basic_machine in basic_machine=m68k-hp os=-hpux ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; hp9k2[0-9][0-9] | hp9k31[0-9]) basic_machine=m68000-hp ;; hp9k3[2-9][0-9]) basic_machine=m68k-hp ;; - hp9k7[0-9][0-9] | hp7[0-9][0-9] | hp9k8[0-9]7 | hp8[0-9]7) + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) basic_machine=hppa1.1-hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) @@ -345,27 +618,42 @@ case $basic_machine in hppa-next) os=-nextstep3 ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; i370-ibm* | ibm*) basic_machine=i370-ibm - os=-mvs ;; # I'm not sure what "Sysv32" means. Should this be sysv3.2? - i[3456]86v32) + i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 ;; - i[3456]86v4*) + i*86v4*) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv4 ;; - i[3456]86v) + i*86v) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv ;; - i[3456]86sol2) + i*86sol2) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-solaris2 ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; iris | iris4d) basic_machine=mips-sgi case $os in @@ -391,16 +679,16 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; miniframe) basic_machine=m68000-convergent ;; - mipsel*-linux*) - basic_machine=mipsel-unknown - os=-linux-gnu - ;; - mips*-linux*) - basic_machine=mips-unknown - os=-linux-gnu + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint ;; mips3*-*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` @@ -408,10 +696,37 @@ case $basic_machine in mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; ncr3000) basic_machine=i486-ncr os=-sysv4 ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; news | news700 | news800 | news900) basic_machine=m68k-sony os=-newsos @@ -424,6 +739,10 @@ case $basic_machine in basic_machine=mips-sony os=-newsos ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; next | m*-next ) basic_machine=m68k-next case $os in @@ -449,9 +768,39 @@ case $basic_machine in basic_machine=i960-intel os=-nindy ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; np1) basic_machine=np1-gould ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; pa-hitachi) basic_machine=hppa1.1-hitachi os=-hiuxwe2 @@ -466,53 +815,105 @@ case $basic_machine in pbb) basic_machine=m68k-tti ;; - pc532 | pc532-*) + pc532 | pc532-*) basic_machine=ns32k-pc532 ;; - pentium | p5) - basic_machine=i586-intel + pc98) + basic_machine=i386-pc ;; - pentiumpro | p6) - basic_machine=i686-intel + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - pentium-* | p5-*) + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - pentiumpro-* | p6-*) + pentiumpro-* | p6-* | 6x86-* | athlon-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - k5) - # We don't have specific support for AMD's K5 yet, so just call it a Pentium - basic_machine=i586-amd + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - nexen) - # We don't have specific support for Nexgen yet, so just call it a Pentium - basic_machine=i586-nexgen + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pn) basic_machine=pn-gould ;; - power) basic_machine=rs6000-ibm + power) basic_machine=power-ibm ;; ppc) basic_machine=powerpc-unknown - ;; + ;; ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown - ;; + ;; ppcle-* | powerpclittle-*) basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; ps2) basic_machine=i386-ibm ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; rm[46]00) basic_machine=mips-siemens ;; rtpc | rtpc-*) basic_machine=romp-ibm ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; sequent) basic_machine=i386-sequent ;; @@ -520,6 +921,13 @@ case $basic_machine in basic_machine=sh-hitachi os=-hms ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; sps7) basic_machine=m68k-bull os=-sysv2 @@ -527,6 +935,13 @@ case $basic_machine in spur) basic_machine=spur-unknown ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; sun2) basic_machine=m68000-sun ;; @@ -567,19 +982,51 @@ case $basic_machine in sun386 | sun386i | roadrunner) basic_machine=i386-sun ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; symmetry) basic_machine=i386-sequent os=-dynix ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; tx39) basic_machine=mipstx39-unknown ;; tx39el) basic_machine=mipstx39el-unknown ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; tower | tower-32) basic_machine=m68k-ncr ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; udi29k) basic_machine=a29k-amd os=-udi @@ -588,6 +1035,10 @@ case $basic_machine in basic_machine=a29k-nyu os=-sym1 ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; vaxv) basic_machine=vax-dec os=-sysv @@ -597,8 +1048,8 @@ case $basic_machine in os=-vms ;; vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; + basic_machine=f301-fujitsu + ;; vxworks960) basic_machine=i960-wrs os=-vxworks @@ -611,13 +1062,29 @@ case $basic_machine in basic_machine=a29k-wrs os=-vxworks ;; - xmp) - basic_machine=xmp-cray - os=-unicos + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf ;; - xps | xps100) + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) basic_machine=xps100-honeywell ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; none) basic_machine=none-none os=-none @@ -625,32 +1092,44 @@ case $basic_machine in # Here we handle the default manufacturer of certain CPU types. It is in # some cases the only manufacturer, in others, it is the most popular. - mips) - if [ x$os = x-linux-gnu ]; then - basic_machine=mips-unknown - else - basic_machine=mips-mips - fi + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki ;; romp) basic_machine=romp-ibm ;; + mmix) + basic_machine=mmix-knuth + ;; rs6000) basic_machine=rs6000-ibm ;; vax) basic_machine=vax-dec ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; pdp11) basic_machine=pdp11-dec ;; we32k) basic_machine=we32k-att ;; - sparc) + sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b) basic_machine=sparc-sun ;; - cydra) + cydra) basic_machine=cydra-cydrome ;; orion) @@ -659,6 +1138,15 @@ case $basic_machine in orion105) basic_machine=clipper-highlevel ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; *) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 @@ -711,14 +1199,49 @@ case $os in | -aos* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \ - | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -cygwin32* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -uxpv*) + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos*) # Remember, each alternative MUST END IN *, to match a version number. ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; -linux*) os=`echo $os | sed -e 's|linux|linux-gnu|'` ;; @@ -728,6 +1251,15 @@ case $os in -sunos6*) os=`echo $os | sed -e 's|sunos6|solaris3|'` ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; -osfrose*) os=-osfrose ;; @@ -743,11 +1275,26 @@ case $os in -acis*) os=-aos ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; -ctix* | -uts*) os=-sysv ;; + -nova*) + os=-rtmk-nova + ;; -ns2 ) - os=-nextstep2 + os=-nextstep2 + ;; + -nsk*) + os=-nsk ;; # Preserve the version number of sinix5. -sinix5.*) @@ -756,6 +1303,9 @@ case $os in -sinix*) os=-sysv4 ;; + -tpf*) + os=-tpf + ;; -triton*) os=-sysv3 ;; @@ -774,9 +1324,27 @@ case $os in # This must come after -sysvr4. -sysv*) ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; -xenix) os=-xenix ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; -none) ;; *) @@ -802,10 +1370,20 @@ case $basic_machine in *-acorn) os=-riscix1.2 ;; + arm*-rebel) + os=-linux + ;; arm*-semi) os=-aout ;; - pdp11-*) + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) os=-none ;; *-dec | vax-*) @@ -823,15 +1401,45 @@ case $basic_machine in # default. # os=-sunos4 ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; *-tti) # must be before sparc entry or we get the wrong os. os=-sysv3 ;; sparc-* | *-sun) os=-sunos4.1.1 ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; *-ibm) os=-aix ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; *-hp) os=-hpux ;; @@ -874,27 +1482,39 @@ case $basic_machine in *-next) os=-nextstep3 ;; - *-gould) + *-gould) os=-sysv ;; - *-highlevel) + *-highlevel) os=-bsd ;; *-encore) os=-bsd ;; - *-sgi) + *-sgi) os=-irix ;; - *-siemens) + *-siemens) os=-sysv4 ;; *-masscomp) os=-rtu ;; - f301-fujitsu) + f30[01]-fujitsu | f700-fujitsu) os=-uxpv ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; *) os=-none ;; @@ -916,9 +1536,15 @@ case $basic_machine in -aix*) vendor=ibm ;; + -beos*) + vendor=be + ;; -hpux*) vendor=hp ;; + -mpeix*) + vendor=hp + ;; -hiux*) vendor=hitachi ;; @@ -934,21 +1560,47 @@ case $basic_machine in -genix*) vendor=ns ;; - -mvs*) + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) vendor=ibm ;; -ptx*) vendor=sequent ;; - -vxsim* | -vxworks*) + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) vendor=wrs ;; -aux*) vendor=apple ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; esac basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` ;; esac echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.ver b/config.ver new file mode 100644 index 0000000..5b6b6c8 --- /dev/null +++ b/config.ver @@ -0,0 +1,44 @@ + +JPEG_VER_MAJOR=62 +JPEG_VER_MINOR=1 +JPEG_REVISION=0 + +case $host_os in + cygwin*) + # The shared library built from this source code is *not* binary + # compatible with the cygwin's official binary release (cygjpeg-62.dll). + # This is because the official binary has been built with + # the lossless jpeg patch which is available as ljpeg-6b.tar.gz . + # Therefore we decided to give the shared library the version number + # other than 62. + # + JPEG_VER_MAJOR=162 + JPEG_VER_MINOR=0 + ;; + freebsd*) + # This follows the official binary release in the ports collection. + JPEG_VER_MAJOR=9 + ;; +esac + +# convert absolute version numbers to libtool ages +case $version_type in + freebsd-aout|freebsd-elf|sunos) + JPEG_LT_CURRENT=$JPEG_VER_MAJOR + JPEG_LT_REVISION=$JPEG_VER_MINOR + JPEG_LT_AGE=0 + ;; + irix|nonstopux) + JPEG_LT_CURRENT=`expr $JPEG_VER_MAJOR + $JPEG_VER_MINOR - 1` + JPEG_LT_AGE=$JPEG_VER_MINOR + JPEG_LT_REVISION=$JPEG_VER_MINOR + ;; + *) + JPEG_LT_CURRENT=`expr $JPEG_VER_MAJOR + $JPEG_VER_MINOR` + JPEG_LT_AGE=$JPEG_VER_MINOR + JPEG_LT_REVISION=$JPEG_REVISION + ;; +esac + +JPEG_LIB_VERSION=$JPEG_LT_CURRENT:$JPEG_LT_REVISION:$JPEG_LT_AGE + diff --git a/configure b/configure index 35c9db5..9c368e5 100755 --- a/configure +++ b/configure @@ -1,7 +1,7 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.12 +# Generated automatically using autoconf version 2.13 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. # # This configure script is free software; the Free Software Foundation @@ -12,13 +12,190 @@ ac_help= ac_default_prefix=/usr/local # Any additions from configure.in: ac_help="$ac_help - --enable-shared build shared library using GNU libtool" + --enable-shared[=PKGS] build shared libraries [default=no]" ac_help="$ac_help - --enable-static build static library using GNU libtool" + --enable-static[=PKGS] build static libraries [default=no]" +ac_help="$ac_help + --enable-fast-install[=PKGS] optimize for fast installation [default=yes]" +ac_help="$ac_help + --with-gnu-ld assume the C compiler uses GNU ld [default=no]" + +# Find the correct PATH separator. Usually this is `:', but +# DJGPP uses `;' like DOS. +if test "X${PATH_SEPARATOR+set}" != Xset; then + UNAME=${UNAME-`uname 2>/dev/null`} + case X$UNAME in + *-DOS) lt_cv_sys_path_separator=';' ;; + *) lt_cv_sys_path_separator=':' ;; + esac + PATH_SEPARATOR=$lt_cv_sys_path_separator +fi + + +# Check that we are running under the correct shell. +SHELL=${CONFIG_SHELL-/bin/sh} + +case X$ECHO in +X*--fallback-echo) + # Remove one level of quotation (which was required for Make). + ECHO=`echo "$ECHO" | sed 's,\\\\\$\\$0,'$0','` + ;; +esac + +echo=${ECHO-echo} +if test "X$1" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift +elif test "X$1" = X--fallback-echo; then + # Avoid inline document here, it may be left over + : +elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then + # Yippee, $echo works! + : +else + # Restart under the correct shell. + exec $SHELL "$0" --no-reexec ${1+"$@"} +fi + +if test "X$1" = X--fallback-echo; then + # used as fallback echo + shift + cat </dev/null && + echo_test_string="`eval $cmd`" && + (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null + then + break + fi + done +fi + +if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + : +else + # The Solaris, AIX, and Digital Unix default echo programs unquote + # backslashes. This makes it impossible to quote backslashes using + # echo "$something" | sed 's/\\/\\\\/g' + # + # So, first we look for a working echo in the user's PATH. + + IFS="${IFS= }"; save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for dir in $PATH /usr/ucb; do + if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && + test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$dir/echo" + break + fi + done + IFS="$save_ifs" + + if test "X$echo" = Xecho; then + # We didn't find a better echo, so look for alternatives. + if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # This shell has a builtin print -r that does the trick. + echo='print -r' + elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && + test "X$CONFIG_SHELL" != X/bin/ksh; then + # If we have ksh, try running configure again with it. + ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh} + export ORIGINAL_CONFIG_SHELL + CONFIG_SHELL=/bin/ksh + export CONFIG_SHELL + exec $CONFIG_SHELL "$0" --no-reexec ${1+"$@"} + else + # Try using printf. + echo='printf %s\n' + if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # Cool, printf works + : + elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL + export CONFIG_SHELL + SHELL="$CONFIG_SHELL" + export SHELL + echo="$CONFIG_SHELL $0 --fallback-echo" + elif echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$CONFIG_SHELL $0 --fallback-echo" + else + # maybe with a smaller string... + prev=: + + for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do + if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null + then + break + fi + prev="$cmd" + done + + if test "$prev" != 'sed 50q "$0"'; then + echo_test_string=`eval $prev` + export echo_test_string + exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "$0" ${1+"$@"} + else + # Oops. We lost completely, so just stick with echo. + echo=echo + fi + fi + fi + fi +fi +fi + +# Copy echo and quote the copy suitably for passing to libtool from +# the Makefile, instead of quoting the original, which is used later. +ECHO=$echo +if test "X$ECHO" = "X$CONFIG_SHELL $0 --fallback-echo"; then + ECHO="$CONFIG_SHELL \\\$\$0 --fallback-echo" +fi + + +ac_help="$ac_help + --disable-libtool-lock avoid locking (might break parallel builds)" +ac_help="$ac_help + --with-pic try to use only PIC/non-PIC objects [default=use both]" ac_help="$ac_help --enable-maxmem[=N] enable use of temp files, set max mem usage to N MB" ac_help="$ac_help " +ac_help="$ac_help + --disable-mmx do not use MMX instruction set" +ac_help="$ac_help + --disable-3dnow do not use 3DNow! instruction set" +ac_help="$ac_help + --disable-sse do not use SSE instruction set" +ac_help="$ac_help + --disable-sse2 do not use SSE2 instruction set" +ac_help="$ac_help + --enable-uchar-boolean define type \"boolean\" as unsigned char (for Windows)" # Initialize some variables set by options. # The variables have the same names as the options, with @@ -57,6 +234,7 @@ mandir='${prefix}/man' # Initialize some other variables. subdirs= MFLAGS= MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. ac_max_here_lines=12 @@ -340,7 +518,7 @@ EOF verbose=yes ;; -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.12" + echo "configure generated by autoconf version 2.13" exit 0 ;; -with-* | --with-*) @@ -386,17 +564,6 @@ EOF -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } ;; - *=*) - varname=`echo "$ac_option"|sed -e 's/=.*//'` - # Reject names that aren't valid shell variable names. - if test -n "`echo $varname| sed 's/[a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $varname: invalid shell variable name" 1>&2; exit 1; } - fi - val="`echo "$ac_option"|sed 's/[^=]*=//'`" - test -n "$verbose" && echo " setting shell variable $varname to $val" - eval "$varname='$val'" - eval "export $varname" ;; - *) if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then echo "configure: warning: $ac_option: invalid host type" 1>&2 @@ -509,14 +676,23 @@ for ac_site_file in $CONFIG_SITE; do fi done +if test -r "$cache_file"; then + echo "loading cache $cache_file" + . $cache_file +else + echo "creating cache $cache_file" + > $cache_file +fi ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross +ac_exeext= +ac_objext=o if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then @@ -534,15 +710,16 @@ fi # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:538: checking for $ac_word" >&5 +echo "configure:714: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="gcc" @@ -563,16 +740,17 @@ if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:567: checking for $ac_word" >&5 +echo "configure:744: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_prog_rejected=no - for ac_dir in $PATH; do + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then @@ -607,25 +785,61 @@ else echo "$ac_t""no" 1>&6 fi + if test -z "$CC"; then + case "`uname -s`" in + *win32* | *WIN32*) + # Extract the first word of "cl", so it can be a program name with args. +set dummy cl; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:795: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_CC="cl" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +CC="$ac_cv_prog_CC" +if test -n "$CC"; then + echo "$ac_t""$CC" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + ;; + esac + fi test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:615: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 +echo "configure:827: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross -cat > conftest.$ac_ext < conftest.$ac_ext << EOF + +#line 838 "configure" #include "confdefs.h" + main(){return(0);} EOF -if { (eval echo configure:629: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:843: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then @@ -639,18 +853,24 @@ else ac_cv_prog_cc_works=no fi rm -fr conftest* +ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:649: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 +echo "configure:869: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:654: checking whether we are using GNU C" >&5 +echo "configure:874: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -659,7 +879,7 @@ else yes; #endif EOF -if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:663: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:883: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no @@ -670,14 +890,47 @@ echo "$ac_t""$ac_cv_prog_gcc" 1>&6 if test $ac_cv_prog_gcc = yes; then GCC=yes - test "${CFLAGS+set}" = set || CFLAGS="-O2" else GCC= - test "${CFLAGS+set}" = set || CFLAGS="-O" +fi + +ac_test_CFLAGS="${CFLAGS+set}" +ac_save_CFLAGS="$CFLAGS" +CFLAGS= +echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 +echo "configure:902: checking whether ${CC-cc} accepts -g" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + echo 'void f(){}' > conftest.c +if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then + ac_cv_prog_cc_g=yes +else + ac_cv_prog_cc_g=no +fi +rm -f conftest* + +fi + +echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS="$ac_save_CFLAGS" +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:681: checking how to run the C preprocessor" >&5 +echo "configure:934: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= @@ -692,14 +945,14 @@ else # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:702: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:955: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else @@ -709,14 +962,31 @@ else rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < +Syntax Error +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:972: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + CPP="${CC-cc} -nologo -E" + cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:719: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:989: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else @@ -728,6 +998,8 @@ else fi rm -f conftest* fi +rm -f conftest* +fi rm -f conftest* ac_cv_prog_CPP="$CPP" fi @@ -738,12 +1010,12 @@ fi echo "$ac_t""$CPP" 1>&6 echo $ac_n "checking for function prototypes""... $ac_c" 1>&6 -echo "configure:742: checking for function prototypes" >&5 +echo "configure:1014: checking for function prototypes" >&5 if eval "test \"`echo '$''{'ijg_cv_have_prototypes'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1037: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ijg_cv_have_prototypes=yes else @@ -788,18 +1060,18 @@ else fi ac_safe=`echo "stddef.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for stddef.h""... $ac_c" 1>&6 -echo "configure:792: checking for stddef.h" >&5 +echo "configure:1064: checking for stddef.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:802: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:1074: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" @@ -824,18 +1096,18 @@ fi ac_safe=`echo "stdlib.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for stdlib.h""... $ac_c" 1>&6 -echo "configure:828: checking for stdlib.h" >&5 +echo "configure:1100: checking for stdlib.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:838: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:1110: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" @@ -860,18 +1132,18 @@ fi ac_safe=`echo "string.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for string.h""... $ac_c" 1>&6 -echo "configure:864: checking for string.h" >&5 +echo "configure:1136: checking for string.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:874: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:1146: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" @@ -896,9 +1168,9 @@ EOF fi echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:900: checking for size_t" >&5 +echo "configure:1172: checking for size_t" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1195: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ijg_size_t_ok=yes else @@ -933,18 +1205,18 @@ echo "$ac_t""$ijg_size_t_ok" 1>&6 if test "$ijg_size_t_ok" != yes; then ac_safe=`echo "sys/types.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for sys/types.h""... $ac_c" 1>&6 -echo "configure:937: checking for sys/types.h" >&5 +echo "configure:1209: checking for sys/types.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:947: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out` +{ (eval echo configure:1219: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" @@ -964,7 +1236,7 @@ if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then EOF cat > conftest.$ac_ext < EOF @@ -990,16 +1262,16 @@ if test "$ijg_size_t_ok" = no; then fi fi echo $ac_n "checking for type unsigned char""... $ac_c" 1>&6 -echo "configure:994: checking for type unsigned char" >&5 +echo "configure:1266: checking for type unsigned char" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1275: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6 cat >> confdefs.h <<\EOF @@ -1014,16 +1286,16 @@ else fi rm -f conftest* echo $ac_n "checking for type unsigned short""... $ac_c" 1>&6 -echo "configure:1018: checking for type unsigned short" >&5 +echo "configure:1290: checking for type unsigned short" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1299: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6 cat >> confdefs.h <<\EOF @@ -1038,9 +1310,9 @@ else fi rm -f conftest* echo $ac_n "checking for type void""... $ac_c" 1>&6 -echo "configure:1042: checking for type void" >&5 +echo "configure:1314: checking for type void" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1344: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6 else @@ -1084,12 +1356,12 @@ fi rm -f conftest* echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:1088: checking for working const" >&5 +echo "configure:1360: checking for working const" >&5 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1414: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_c_const=yes else @@ -1159,10 +1431,10 @@ EOF fi echo $ac_n "checking for inline""... $ac_c" 1>&6 -echo "configure:1163: checking for inline" >&5 +echo "configure:1435: checking for inline" >&5 ijg_cv_inline="" cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1446: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ijg_cv_inline="__inline__" else @@ -1178,7 +1450,7 @@ else cat conftest.$ac_ext >&5 rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1462: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ijg_cv_inline="__inline" else @@ -1194,7 +1466,7 @@ else cat conftest.$ac_ext >&5 rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1478: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ijg_cv_inline="inline" else @@ -1220,16 +1492,16 @@ cat >> confdefs.h <&6 -echo "configure:1224: checking for broken incomplete types" >&5 +echo "configure:1496: checking for broken incomplete types" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1505: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""ok" 1>&6 else @@ -1244,9 +1516,9 @@ EOF fi rm -f conftest* echo $ac_n "checking for short external names""... $ac_c" 1>&6 -echo "configure:1248: checking for short external names" >&5 +echo "configure:1520: checking for short external names" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1532: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* echo "$ac_t""ok" 1>&6 else @@ -1271,14 +1543,14 @@ EOF fi rm -f conftest* echo $ac_n "checking to see if char is signed""... $ac_c" 1>&6 -echo "configure:1275: checking to see if char is signed" >&5 +echo "configure:1547: checking to see if char is signed" >&5 if test "$cross_compiling" = yes; then echo Assuming that char is signed on target machine. echo If it is unsigned, this will be a little bit inefficient. else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1578: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then echo "$ac_t""no" 1>&6 cat >> confdefs.h <<\EOF @@ -1319,12 +1591,12 @@ rm -fr conftest* fi echo $ac_n "checking to see if right shift is signed""... $ac_c" 1>&6 -echo "configure:1323: checking to see if right shift is signed" >&5 +echo "configure:1595: checking to see if right shift is signed" >&5 if test "$cross_compiling" = yes; then echo "$ac_t""Assuming that right shift is signed on target machine." 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1630: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then echo "$ac_t""no" 1>&6 cat >> confdefs.h <<\EOF @@ -1371,12 +1643,12 @@ rm -fr conftest* fi echo $ac_n "checking to see if fopen accepts b spec""... $ac_c" 1>&6 -echo "configure:1375: checking to see if fopen accepts b spec" >&5 +echo "configure:1647: checking to see if fopen accepts b spec" >&5 if test "$cross_compiling" = yes; then echo "$ac_t""Assuming that it does." 1>&6 else cat > conftest.$ac_ext < @@ -1386,7 +1658,7 @@ main() { exit(1); } EOF -if { (eval echo configure:1390: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1662: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then echo "$ac_t""yes" 1>&6 else @@ -1428,28 +1700,30 @@ ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:1436: checking for a BSD compatible install" >&5 +echo "configure:1709: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS="${IFS}:" + IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" for ac_dir in $PATH; do # Account for people who put trailing slashes in PATH elements. case "$ac_dir/" in /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. - for ac_prog in ginstall installbsd scoinst install; do + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do if test -f $ac_dir/$ac_prog; then if test $ac_prog = install && grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. - # OSF/1 installbsd also uses dspmsg, but is usable. : else ac_cv_path_install="$ac_dir/$ac_prog -c" @@ -1479,20 +1753,23 @@ echo "$ac_t""$INSTALL" 1>&6 # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' + test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1488: checking for $ac_word" >&5 +echo "configure:1764: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_RANLIB="ranlib" @@ -1511,30 +1788,186 @@ else fi + +# Make sure we can run config.sub. +if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then : +else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; } +fi + +echo $ac_n "checking host system type""... $ac_c" 1>&6 +echo "configure:1799: checking host system type" >&5 + +host_alias=$host +case "$host_alias" in +NONE) + case $nonopt in + NONE) + if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then : + else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; } + fi ;; + *) host_alias=$nonopt ;; + esac ;; +esac + +host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias` +host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +echo "$ac_t""$host" 1>&6 + +echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 +echo "configure:1820: checking for Cygwin environment" >&5 +if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_cygwin=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_cygwin=no +fi +rm -f conftest* +rm -f conftest* +fi + +echo "$ac_t""$ac_cv_cygwin" 1>&6 +CYGWIN= +test "$ac_cv_cygwin" = yes && CYGWIN=yes +echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6 +echo "configure:1853: checking for mingw32 environment" >&5 +if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_mingw32=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_mingw32=no +fi +rm -f conftest* +rm -f conftest* +fi + +echo "$ac_t""$ac_cv_mingw32" 1>&6 +MINGW32= +test "$ac_cv_mingw32" = yes && MINGW32=yes + + +echo $ac_n "checking for executable suffix""... $ac_c" 1>&6 +echo "configure:1884: checking for executable suffix" >&5 +if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$CYGWIN" = yes || test "$MINGW32" = yes; then + ac_cv_exeext=.exe +else + rm -f conftest* + echo 'int main () { return 0; }' > conftest.$ac_ext + ac_cv_exeext= + if { (eval echo configure:1894: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then + for file in conftest.*; do + case $file in + *.c | *.o | *.obj) ;; + *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;; + esac + done + else + { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } + fi + rm -f conftest* + test x"${ac_cv_exeext}" = x && ac_cv_exeext=no +fi +fi + +EXEEXT="" +test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} +echo "$ac_t""${ac_cv_exeext}" 1>&6 +ac_exeext=$EXEEXT + + # Decide whether to use libtool, # and if so whether to build shared, static, or both flavors of library. -LTSHARED="no" # Check whether --enable-shared or --disable-shared was given. if test "${enable_shared+set}" = set; then enableval="$enable_shared" - LTSHARED="$enableval" + p=${PACKAGE-default} +case $enableval in +yes) enable_shared=yes ;; +no) enable_shared=no ;; +*) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_shared=no fi -LTSTATIC="no" # Check whether --enable-static or --disable-static was given. if test "${enable_static+set}" = set; then enableval="$enable_static" - LTSTATIC="$enableval" + p=${PACKAGE-default} +case $enableval in +yes) enable_static=yes ;; +no) enable_static=no ;; +*) + enable_static=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_static=no fi -if test "x$LTSHARED" != xno -o "x$LTSTATIC" != xno; then +if test "x$enable_shared" != xno -o "x$enable_static" != xno; then USELIBTOOL="yes" - LIBTOOL="./libtool" +# LIBTOOL="./libtool" O="lo" A="la" LN='$(LIBTOOL) --mode=link $(CC)' INSTALL_LIB='$(LIBTOOL) --mode=install ${INSTALL}' INSTALL_PROGRAM="\$(LIBTOOL) --mode=install $INSTALL_PROGRAM" + UNINSTALL='$(LIBTOOL) --mode=uninstall $(RM)' else USELIBTOOL="no" LIBTOOL="" @@ -1542,6 +1975,7 @@ else A="a" LN='$(CC)' INSTALL_LIB="$INSTALL_DATA" + UNINSTALL='$(RM)' fi @@ -1549,83 +1983,4330 @@ fi + # Configure libtool if needed. if test $USELIBTOOL = yes; then - disable_shared= - disable_static= - if test "x$LTSHARED" = xno; then - disable_shared="--disable-shared" - fi - if test "x$LTSTATIC" = xno; then - disable_static="--disable-static" - fi - $srcdir/ltconfig $disable_shared $disable_static $srcdir/ltmain.sh + + + # Find the correct PATH separator. Usually this is `:', but +# DJGPP uses `;' like DOS. +if test "X${PATH_SEPARATOR+set}" != Xset; then + UNAME=${UNAME-`uname 2>/dev/null`} + case X$UNAME in + *-DOS) lt_cv_sys_path_separator=';' ;; + *) lt_cv_sys_path_separator=':' ;; + esac + PATH_SEPARATOR=$lt_cv_sys_path_separator fi -# Select memory manager depending on user input. -# If no "-enable-maxmem", use jmemnobs -MEMORYMGR='jmemnobs.$(O)' -MAXMEM="no" -# Check whether --enable-maxmem or --disable-maxmem was given. -if test "${enable_maxmem+set}" = set; then - enableval="$enable_maxmem" - MAXMEM="$enableval" +# Check whether --enable-fast-install or --disable-fast-install was given. +if test "${enable_fast_install+set}" = set; then + enableval="$enable_fast_install" + p=${PACKAGE-default} +case $enableval in +yes) enable_fast_install=yes ;; +no) enable_fast_install=no ;; +*) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_fast_install=yes fi -# support --with-maxmem for backwards compatibility with IJG V5. -# Check whether --with-maxmem or --without-maxmem was given. -if test "${with_maxmem+set}" = set; then - withval="$with_maxmem" - MAXMEM="$withval" +echo $ac_n "checking build system type""... $ac_c" 1>&6 +echo "configure:2027: checking build system type" >&5 + +build_alias=$build +case "$build_alias" in +NONE) + case $nonopt in + NONE) build_alias=$host_alias ;; + *) build_alias=$nonopt ;; + esac ;; +esac + +build=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $build_alias` +build_cpu=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +build_vendor=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +build_os=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +echo "$ac_t""$build" 1>&6 + +# Check whether --with-gnu-ld or --without-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then + withval="$with_gnu_ld" + test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no fi -if test "x$MAXMEM" = xyes; then - MAXMEM=1 +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + echo $ac_n "checking for ld used by GCC""... $ac_c" 1>&6 +echo "configure:2056: checking for ld used by GCC" >&5 + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | [A-Za-z]:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the path of ld + ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` + while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do + ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + echo $ac_n "checking for GNU ld""... $ac_c" 1>&6 +echo "configure:2086: checking for GNU ld" >&5 +else + echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6 +echo "configure:2089: checking for non-GNU ld" >&5 +fi +if eval "test \"`echo '$''{'lt_cv_path_LD'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -z "$LD"; then + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some GNU ld's only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + if "$lt_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then + test "$with_gnu_ld" != no && break + else + test "$with_gnu_ld" != yes && break + fi + fi + done + IFS="$ac_save_ifs" +else + lt_cv_path_LD="$LD" # Let the user override the test with a path. +fi fi -if test "x$MAXMEM" != xno; then - if test -n "`echo $MAXMEM | sed 's/[0-9]//g'`"; then - { echo "configure: error: non-numeric argument to --enable-maxmem" 1>&2; exit 1; } - fi - DEFAULTMAXMEM=`expr $MAXMEM \* 1048576` -cat >> confdefs.h <&6 -echo "configure:1596: checking for 'tmpfile()'" >&5 -cat > conftest.$ac_ext < -int main() { - FILE * tfile = tmpfile(); -; return 0; } -EOF -if { (eval echo configure:1605: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then - rm -rf conftest* - echo "$ac_t""yes" 1>&6 -MEMORYMGR='jmemansi.$(O)' +LD="$lt_cv_path_LD" +if test -n "$LD"; then + echo "$ac_t""$LD" 1>&6 else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* echo "$ac_t""no" 1>&6 -MEMORYMGR='jmemname.$(O)' -cat >> confdefs.h <<\EOF -#define NEED_SIGNAL_CATCHER -EOF +fi +test -z "$LD" && { echo "configure: error: no acceptable ld found in \$PATH" 1>&2; exit 1; } +echo $ac_n "checking if the linker ($LD) is GNU ld""... $ac_c" 1>&6 +echo "configure:2124: checking if the linker ($LD) is GNU ld" >&5 +if eval "test \"`echo '$''{'lt_cv_prog_gnu_ld'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + # I'd rather use --version here, but apparently some GNU ld's only accept -v. +if $LD -v 2>&1 &5; then + lt_cv_prog_gnu_ld=yes +else + lt_cv_prog_gnu_ld=no +fi +fi -echo $ac_n "checking for 'mktemp()'""... $ac_c" 1>&6 -echo "configure:1620: checking for 'mktemp()'" >&5 -cat > conftest.$ac_ext <&6 +with_gnu_ld=$lt_cv_prog_gnu_ld + + +echo $ac_n "checking for $LD option to reload object files""... $ac_c" 1>&6 +echo "configure:2141: checking for $LD option to reload object files" >&5 +if eval "test \"`echo '$''{'lt_cv_ld_reload_flag'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + lt_cv_ld_reload_flag='-r' +fi + +echo "$ac_t""$lt_cv_ld_reload_flag" 1>&6 +reload_flag=$lt_cv_ld_reload_flag +test -n "$reload_flag" && reload_flag=" $reload_flag" + +echo $ac_n "checking for BSD-compatible nm""... $ac_c" 1>&6 +echo "configure:2153: checking for BSD-compatible nm" >&5 +if eval "test \"`echo '$''{'lt_cv_path_NM'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM="$NM" +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/${ac_tool_prefix}nm + if test -f $tmp_nm || test -f $tmp_nm$ac_exeext ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + if ($tmp_nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep '(/dev/null|Invalid file or object type)' >/dev/null; then + lt_cv_path_NM="$tmp_nm -B" + break + elif ($tmp_nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + lt_cv_path_NM="$tmp_nm -p" + break + else + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + fi + fi + done + IFS="$ac_save_ifs" + test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm +fi +fi + +NM="$lt_cv_path_NM" +echo "$ac_t""$NM" 1>&6 + +echo $ac_n "checking for a sed that does not truncate output""... $ac_c" 1>&6 +echo "configure:2191: checking for a sed that does not truncate output" >&5 +if eval "test \"`echo '$''{'lt_cv_path_SED'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + # Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_executable_p="test -f" +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + _sed_list="$_sed_list $as_dir/$ac_prog$ac_exec_ext" + fi + done + done +done + + # Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. +: ${TMPDIR=/tmp} +{ + tmp=`(umask 077 && mktemp -d -q "$TMPDIR/sedXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=$TMPDIR/sed$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in $TMPDIR" >&2 + { (exit 1); exit 1; } +} + _max=0 + _count=0 + # Add /usr/xpg4/bin/sed as it is typically found on Solaris + # along with /bin/sed that truncates output. + for _sed in $_sed_list /usr/xpg4/bin/sed; do + test ! -f ${_sed} && break + cat /dev/null > "$tmp/sed.in" + _count=0 + echo ${ECHO_N-$ac_n} "0123456789${ECHO_C-$ac_c}" >"$tmp/sed.in" + # Check for GNU sed and select it if it is found. + if "${_sed}" --version 2>&1 < /dev/null | egrep '(GNU)' > /dev/null; then + lt_cv_path_SED=${_sed} + break + fi + while true; do + cat "$tmp/sed.in" "$tmp/sed.in" >"$tmp/sed.tmp" + mv "$tmp/sed.tmp" "$tmp/sed.in" + cp "$tmp/sed.in" "$tmp/sed.nl" + echo >>"$tmp/sed.nl" + ${_sed} -e 's/a$//' < "$tmp/sed.nl" >"$tmp/sed.out" || break + cmp -s "$tmp/sed.out" "$tmp/sed.nl" || break + # 40000 chars as input seems more than enough + test $_count -gt 10 && break + _count=`expr $_count + 1` + if test $_count -gt $_max; then + _max=$_count + lt_cv_path_SED=$_sed + fi + done + done + rm -rf "$tmp" + +fi + +if test "X$SED" != "X"; then + lt_cv_path_SED=$SED +else + SED=$lt_cv_path_SED +fi +echo "$ac_t""$SED" 1>&6 + +echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 +echo "configure:2275: checking whether ln -s works" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + rm -f conftestdata +if ln -s X conftestdata 2>/dev/null +then + rm -f conftestdata + ac_cv_prog_LN_S="ln -s" +else + ac_cv_prog_LN_S=ln +fi +fi +LN_S="$ac_cv_prog_LN_S" +if test "$ac_cv_prog_LN_S" = "ln -s"; then + echo "$ac_t""yes" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +echo $ac_n "checking how to recognise dependent libraries""... $ac_c" 1>&6 +echo "configure:2296: checking how to recognise dependent libraries" >&5 +if eval "test \"`echo '$''{'lt_cv_deplibs_check_method'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given egrep regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case $host_os in +aix4* | aix5*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi4*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin* | mingw* | pw32*) + lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library' + lt_cv_file_magic_cmd='/usr/bin/file -L' + case "$host_os" in + rhapsody* | darwin1.[012]) + lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1` + ;; + *) # Darwin 1.3 on + lt_cv_file_magic_test_file='/usr/lib/libSystem.dylib' + ;; + esac + ;; + +freebsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20*|hpux11*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + irix5* | nonstopux*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" + ;; + *) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1" + ;; + esac + lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux-gnu*) + case $host_cpu in + alpha* | hppa* | i*86 | mips | mipsel | powerpc* | sparc* | ia64* | s390* | x86_64*) + lt_cv_deplibs_check_method=pass_all ;; + *) + # glibc up to 2.1.1 does not perform some relocations on ARM + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; + esac + lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so\.[0-9]+\.[0-9]+$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +openbsd*) + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB shared object' + else + lt_cv_deplibs_check_method='file_magic OpenBSD.* shared library' + fi + ;; + +osf3* | osf4* | osf5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' + lt_cv_file_magic_test_file=/shlib/libc.so + lt_cv_deplibs_check_method=pass_all + ;; + +sco3.2v5*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + lt_cv_file_magic_test_file=/lib/libc.so + ;; + +sysv5uw[78]* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; +esac + +fi + +echo "$ac_t""$lt_cv_deplibs_check_method" 1>&6 +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method + +echo $ac_n "checking for object suffix""... $ac_c" 1>&6 +echo "configure:2482: checking for object suffix" >&5 +if eval "test \"`echo '$''{'ac_cv_objext'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + rm -f conftest* +echo 'int i = 1;' > conftest.$ac_ext +if { (eval echo configure:2488: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + for ac_file in conftest.*; do + case $ac_file in + *.c) ;; + *) ac_cv_objext=`echo $ac_file | sed -e s/conftest.//` ;; + esac + done +else + { echo "configure: error: installation or configuration problem; compiler does not work" 1>&2; exit 1; } +fi +rm -f conftest* +fi + +echo "$ac_t""$ac_cv_objext" 1>&6 +OBJEXT=$ac_cv_objext +ac_objext=$ac_cv_objext + +if test $host != $build; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + + + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +echo $ac_n "checking command to parse $NM output""... $ac_c" 1>&6 +echo "configure:2516: checking command to parse $NM output" >&5 +if eval "test \"`echo '$''{'lt_cv_sys_global_symbol_pipe'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Transform the above into a raw symbol and a C symbol. +symxfrm='\1 \2\3 \3' + +# Transform an extracted symbol line into a proper C declaration +lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([^ ]*\) \([^ ]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32*) + symcode='[ABCDGISTW]' + ;; +hpux*) # Its linker distinguishes data from code symbols + lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" + lt_cv_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris* | sysv5*) + symcode='[BDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# Handle CRLF in mingw tool chain +opt_cr= +case $host_os in +mingw*) + opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then + symcode='[ABCDGISTW]' +fi + +# Try without a prefix undercore, then with it. +for ac_symprfx in "" "_"; do + + # Write the raw and C identifiers. +lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'" + + # Check to see that the pipe works correctly. + pipe_works=no + rm -f conftest* + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { (eval echo configure:2602: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist\") 1>&5; (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) 2>&5; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if egrep ' nm_test_var$' "$nlist" >/dev/null; then + if egrep ' nm_test_func$' "$nlist" >/dev/null; then + cat < conftest.$ac_ext +#ifdef __cplusplus +extern "C" { +#endif + +EOF + # Now generate the symbol file. + eval "$lt_cv_global_symbol_to_cdecl"' < "$nlist" >> conftest.$ac_ext' + + cat <> conftest.$ac_ext +#if defined (__STDC__) && __STDC__ +# define lt_ptr void * +#else +# define lt_ptr char * +# define const +#endif + +/* The mapping between symbol names and symbols. */ +const struct { + const char *name; + lt_ptr address; +} +lt_preloaded_symbols[] = +{ +EOF + sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr) \&\2},/" < "$nlist" >> conftest.$ac_ext + cat <<\EOF >> conftest.$ac_ext + {0, (lt_ptr) 0} +}; + +#ifdef __cplusplus +} +#endif +EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="conftstm.$ac_objext" + CFLAGS="$CFLAGS$no_builtin_flag" + if { (eval echo configure:2653: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -f conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +global_symbol_pipe="$lt_cv_sys_global_symbol_pipe" +if test -z "$lt_cv_sys_global_symbol_pipe"; then + global_symbol_to_cdecl= + global_symbol_to_c_name_address= +else + global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl" + global_symbol_to_c_name_address="$lt_cv_global_symbol_to_c_name_address" +fi +if test -z "$global_symbol_pipe$global_symbol_to_cdec$global_symbol_to_c_name_address"; +then + echo "$ac_t""failed" 1>&6 +else + echo "$ac_t""ok" 1>&6 +fi + +for ac_hdr in dlfcn.h +do +ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` +echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 +echo "configure:2702: checking for $ac_hdr" >&5 +if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:2712: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + eval "ac_cv_header_$ac_safe=yes" +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_header_$ac_safe=no" +fi +rm -f conftest* +fi +if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then + echo "$ac_t""yes" 1>&6 + ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` + cat >> confdefs.h <&6 +fi +done + + + + + + +# Only perform the check for file, if the check method requires it +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + echo $ac_n "checking for ${ac_tool_prefix}file""... $ac_c" 1>&6 +echo "configure:2748: checking for ${ac_tool_prefix}file" >&5 +if eval "test \"`echo '$''{'lt_cv_path_MAGIC_CMD'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case $MAGIC_CMD in + /*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; + ?:/*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC_CMD="$MAGIC_CMD" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="/usr/bin:$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/${ac_tool_prefix}file; then + lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC_CMD="$ac_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + echo "$ac_t""$MAGIC_CMD" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + echo $ac_n "checking for file""... $ac_c" 1>&6 +echo "configure:2810: checking for file" >&5 +if eval "test \"`echo '$''{'lt_cv_path_MAGIC_CMD'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case $MAGIC_CMD in + /*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. + ;; + ?:/*) + lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC_CMD="$MAGIC_CMD" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="/usr/bin:$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/file; then + lt_cv_path_MAGIC_CMD="$ac_dir/file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC_CMD="$lt_cv_path_MAGIC_CMD" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC_CMD="$ac_save_MAGIC_CMD" + ;; +esac +fi + +MAGIC_CMD="$lt_cv_path_MAGIC_CMD" +if test -n "$MAGIC_CMD"; then + echo "$ac_t""$MAGIC_CMD" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:2881: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +RANLIB="$ac_cv_prog_RANLIB" +if test -n "$RANLIB"; then + echo "$ac_t""$RANLIB" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +if test -z "$ac_cv_prog_RANLIB"; then +if test -n "$ac_tool_prefix"; then + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:2913: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_RANLIB="ranlib" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" +fi +fi +RANLIB="$ac_cv_prog_RANLIB" +if test -n "$RANLIB"; then + echo "$ac_t""$RANLIB" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +else + RANLIB=":" +fi +fi + +# Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:2948: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_STRIP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +STRIP="$ac_cv_prog_STRIP" +if test -n "$STRIP"; then + echo "$ac_t""$STRIP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +if test -z "$ac_cv_prog_STRIP"; then +if test -n "$ac_tool_prefix"; then + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:2980: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_STRIP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_STRIP="strip" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":" +fi +fi +STRIP="$ac_cv_prog_STRIP" +if test -n "$STRIP"; then + echo "$ac_t""$STRIP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +else + STRIP=":" +fi +fi + + +enable_dlopen=yes +enable_win32_dll=yes + +# Check whether --enable-libtool-lock or --disable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then + enableval="$enable_libtool_lock" + : +fi + +test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +*-*-irix6*) + # Find out which ABI we are using. + echo '#line 3029 "configure"' > conftest.$ac_ext + if { (eval echo configure:3030: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + echo $ac_n "checking whether the C compiler needs -belf""... $ac_c" 1>&6 +echo "configure:3051: checking whether the C compiler needs -belf" >&5 +if eval "test \"`echo '$''{'lt_cv_cc_needs_belf'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + + cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + lt_cv_cc_needs_belf=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + lt_cv_cc_needs_belf=no +fi +rm -f conftest* + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + +fi + +echo "$ac_t""$lt_cv_cc_needs_belf" 1>&6 + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; + +*-*-cygwin* | *-*-mingw* | *-*-pw32*) + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3101: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_DLLTOOL'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +DLLTOOL="$ac_cv_prog_DLLTOOL" +if test -n "$DLLTOOL"; then + echo "$ac_t""$DLLTOOL" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +if test -z "$ac_cv_prog_DLLTOOL"; then +if test -n "$ac_tool_prefix"; then + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3133: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_DLLTOOL'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_DLLTOOL="dlltool" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_DLLTOOL" && ac_cv_prog_DLLTOOL="false" +fi +fi +DLLTOOL="$ac_cv_prog_DLLTOOL" +if test -n "$DLLTOOL"; then + echo "$ac_t""$DLLTOOL" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +else + DLLTOOL="false" +fi +fi + + # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. +set dummy ${ac_tool_prefix}as; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3168: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_AS'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_AS="${ac_tool_prefix}as" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +AS="$ac_cv_prog_AS" +if test -n "$AS"; then + echo "$ac_t""$AS" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +if test -z "$ac_cv_prog_AS"; then +if test -n "$ac_tool_prefix"; then + # Extract the first word of "as", so it can be a program name with args. +set dummy as; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3200: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_AS'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_AS="as" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_AS" && ac_cv_prog_AS="false" +fi +fi +AS="$ac_cv_prog_AS" +if test -n "$AS"; then + echo "$ac_t""$AS" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +else + AS="false" +fi +fi + + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3235: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_OBJDUMP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +OBJDUMP="$ac_cv_prog_OBJDUMP" +if test -n "$OBJDUMP"; then + echo "$ac_t""$OBJDUMP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +if test -z "$ac_cv_prog_OBJDUMP"; then +if test -n "$ac_tool_prefix"; then + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:3267: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_OBJDUMP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_OBJDUMP="objdump" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_OBJDUMP" && ac_cv_prog_OBJDUMP="false" +fi +fi +OBJDUMP="$ac_cv_prog_OBJDUMP" +if test -n "$OBJDUMP"; then + echo "$ac_t""$OBJDUMP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +else + OBJDUMP="false" +fi +fi + + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one + echo $ac_n "checking if libtool should supply DllMain function""... $ac_c" 1>&6 +echo "configure:3303: checking if libtool should supply DllMain function" >&5 +if eval "test \"`echo '$''{'lt_cv_need_dllmain'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + lt_cv_need_dllmain=no +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + lt_cv_need_dllmain=yes +fi +rm -f conftest* +fi + +echo "$ac_t""$lt_cv_need_dllmain" 1>&6 + + case $host/$CC in + *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*) + # old mingw systems require "-dll" to link a DLL, while more recent ones + # require "-mdll" + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -mdll" + echo $ac_n "checking how to link DLLs""... $ac_c" 1>&6 +echo "configure:3337: checking how to link DLLs" >&5 +if eval "test \"`echo '$''{'lt_cv_cc_dll_switch'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + lt_cv_cc_dll_switch=-mdll +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + lt_cv_cc_dll_switch=-dll +fi +rm -f conftest* +fi + +echo "$ac_t""$lt_cv_cc_dll_switch" 1>&6 + CFLAGS="$SAVE_CFLAGS" ;; + *-*-cygwin* | *-*-pw32*) + # cygwin systems need to pass --dll to the linker, and not link + # crt.o which will require a WinMain@16 definition. + lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;; + esac + ;; + +esac + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed='sed -e s/^X//' +sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Constants: +rm="rm -f" + +# Global variables: +default_ofile=libtool +can_build_shared=yes + +# All known linkers require a `.a' archive for static linking (except M$VC, +# which needs '.lib'). +libext=a +ltmain="$ac_aux_dir/ltmain.sh" +ofile="$default_ofile" +with_gnu_ld="$lt_cv_prog_gnu_ld" +need_locks="$enable_libtool_lock" + +old_CC="$CC" +old_CFLAGS="$CFLAGS" + +# Set sane defaults for various variables +test -z "$AR" && AR=ar +test -z "$AR_FLAGS" && AR_FLAGS=cru +test -z "$AS" && AS=as +test -z "$CC" && CC=cc +test -z "$DLLTOOL" && DLLTOOL=dlltool +test -z "$LD" && LD=ld +test -z "$LN_S" && LN_S="ln -s" +test -z "$MAGIC_CMD" && MAGIC_CMD=file +test -z "$NM" && NM=nm +test -z "$OBJDUMP" && OBJDUMP=objdump +test -z "$RANLIB" && RANLIB=: +test -z "$STRIP" && STRIP=: +test -z "$ac_objext" && ac_objext=o + +if test x"$host" != x"$build"; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + +# Transform linux* to *-*-linux-gnu*, to support old configure scripts. +case $host_os in +linux-gnu*) ;; +linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` +esac + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + openbsd*) + old_postinstall_cmds="\$RANLIB -t \$oldlib~$old_postinstall_cmds" + ;; + *) + old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" +fi + +# Allow CC to be a program name with arguments. +set dummy $CC +compiler="$2" + +echo $ac_n "checking for objdir""... $ac_c" 1>&6 +echo "configure:3463: checking for objdir" >&5 +rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + objdir=_libs +fi +rmdir .libs 2>/dev/null +echo "$ac_t""$objdir" 1>&6 + + +# Check whether --with-pic or --without-pic was given. +if test "${with_pic+set}" = set; then + withval="$with_pic" + pic_mode="$withval" +else + pic_mode=default +fi + +test -z "$pic_mode" && pic_mode=default + +# We assume here that the value for lt_cv_prog_cc_pic will not be cached +# in isolation, and that seeing it set (from the cache) indicates that +# the associated values are set (in the cache) correctly too. +echo $ac_n "checking for $compiler option to produce PIC""... $ac_c" 1>&6 +echo "configure:3490: checking for $compiler option to produce PIC" >&5 +if eval "test \"`echo '$''{'lt_cv_prog_cc_pic'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + lt_cv_prog_cc_pic= + lt_cv_prog_cc_shlib= + lt_cv_prog_cc_wl= + lt_cv_prog_cc_static= + lt_cv_prog_cc_no_builtin= + lt_cv_prog_cc_can_build_shared=$can_build_shared + + if test "$GCC" = yes; then + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-static' + + case $host_os in + aix*) + # Below there is a dirty hack to force normal static linking with -ldl + # The problem is because libdl dynamically linked with both libc and + # libC (AIX C++ library), which obviously doesn't included in libraries + # list by gcc. This cause undefined symbols with -static flags. + # This hack allows C programs to be linked with "-static -ldl", but + # not sure about C++ programs. + lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC" + ;; + amigaos*) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4' + ;; + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_cv_prog_cc_pic='-fno-common' + ;; + cygwin* | mingw* | pw32* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + sysv4*MP*) + if test -d /usr/nec; then + lt_cv_prog_cc_pic=-Kconform_pic + fi + ;; + *) + lt_cv_prog_cc_pic='-fPIC' + ;; + esac + else + # PORTME Check for PIC flags for the system compiler. + case $host_os in + aix3* | aix4* | aix5*) + lt_cv_prog_cc_wl='-Wl,' + # All AIX code is PIC. + if test "$host_cpu" = ia64; then + # AIX 5 now supports IA64 processor + lt_cv_prog_cc_static='-Bstatic' + else + lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + hpux9* | hpux10* | hpux11*) + # Is there a better lt_cv_prog_cc_static that works with the bundled CC? + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static="${lt_cv_prog_cc_wl}-a ${lt_cv_prog_cc_wl}archive" + lt_cv_prog_cc_pic='+Z' + ;; + + irix5* | irix6* | nonstopux*) + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-non_shared' + # PIC (with -KPIC) is the default. + ;; + + cygwin* | mingw* | pw32* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + + newsos6) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + ;; + + osf3* | osf4* | osf5*) + # All OSF/1 code is PIC. + lt_cv_prog_cc_wl='-Wl,' + lt_cv_prog_cc_static='-non_shared' + ;; + + sco3.2v5*) + lt_cv_prog_cc_pic='-Kpic' + lt_cv_prog_cc_static='-dn' + lt_cv_prog_cc_shlib='-belf' + ;; + + solaris*) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Wl,' + ;; + + sunos4*) + lt_cv_prog_cc_pic='-PIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Qoption ld ' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + lt_cv_prog_cc_pic='-KPIC' + lt_cv_prog_cc_static='-Bstatic' + lt_cv_prog_cc_wl='-Wl,' + ;; + + uts4*) + lt_cv_prog_cc_pic='-pic' + lt_cv_prog_cc_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + lt_cv_prog_cc_pic='-Kconform_pic' + lt_cv_prog_cc_static='-Bstatic' + fi + ;; + + *) + lt_cv_prog_cc_can_build_shared=no + ;; + esac + fi + +fi + +if test -z "$lt_cv_prog_cc_pic"; then + echo "$ac_t""none" 1>&6 +else + echo "$ac_t""$lt_cv_prog_cc_pic" 1>&6 + + # Check to make sure the pic_flag actually works. + echo $ac_n "checking if $compiler PIC flag $lt_cv_prog_cc_pic works""... $ac_c" 1>&6 +echo "configure:3638: checking if $compiler PIC flag $lt_cv_prog_cc_pic works" >&5 + if eval "test \"`echo '$''{'lt_cv_prog_cc_pic_works'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $lt_cv_prog_cc_pic -DPIC" + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + case $host_os in + hpux9* | hpux10* | hpux11*) + # On HP-UX, both CC and GCC only warn that PIC is supported... then + # they create non-PIC objects. So, if there were any warnings, we + # assume that PIC is not supported. + if test -s conftest.err; then + lt_cv_prog_cc_pic_works=no + else + lt_cv_prog_cc_pic_works=yes + fi + ;; + *) + lt_cv_prog_cc_pic_works=yes + ;; + esac + +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + lt_cv_prog_cc_pic_works=no + +fi +rm -f conftest* + CFLAGS="$save_CFLAGS" + +fi + + + if test "X$lt_cv_prog_cc_pic_works" = Xno; then + lt_cv_prog_cc_pic= + lt_cv_prog_cc_can_build_shared=no + else + lt_cv_prog_cc_pic=" $lt_cv_prog_cc_pic" + fi + + echo "$ac_t""$lt_cv_prog_cc_pic_works" 1>&6 +fi + +# Check for any special shared library compilation flags. +if test -n "$lt_cv_prog_cc_shlib"; then + echo "configure: warning: \`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries" 1>&2 + if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$lt_cv_prog_cc_shlib[ ]" >/dev/null; then : + else + echo "configure: warning: add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure" 1>&2 + lt_cv_prog_cc_can_build_shared=no + fi +fi + +echo $ac_n "checking if $compiler static flag $lt_cv_prog_cc_static works""... $ac_c" 1>&6 +echo "configure:3704: checking if $compiler static flag $lt_cv_prog_cc_static works" >&5 +if eval "test \"`echo '$''{'lt_cv_prog_cc_static_works'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + lt_cv_prog_cc_static_works=no + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $lt_cv_prog_cc_static" + cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + lt_cv_prog_cc_static_works=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -f conftest* + LDFLAGS="$save_LDFLAGS" + +fi + + +# Belt *and* braces to stop my trousers falling down: +test "X$lt_cv_prog_cc_static_works" = Xno && lt_cv_prog_cc_static= +echo "$ac_t""$lt_cv_prog_cc_static_works" 1>&6 + +pic_flag="$lt_cv_prog_cc_pic" +special_shlib_compile_flags="$lt_cv_prog_cc_shlib" +wl="$lt_cv_prog_cc_wl" +link_static_flag="$lt_cv_prog_cc_static" +no_builtin_flag="$lt_cv_prog_cc_no_builtin" +can_build_shared="$lt_cv_prog_cc_can_build_shared" + + +# Check to see if options -o and -c are simultaneously supported by compiler +echo $ac_n "checking if $compiler supports -c -o file.$ac_objext""... $ac_c" 1>&6 +echo "configure:3746: checking if $compiler supports -c -o file.$ac_objext" >&5 +if eval "test \"`echo '$''{'lt_cv_compiler_c_o'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + +$rm -r conftest 2>/dev/null +mkdir conftest +cd conftest +echo "int some_variable = 0;" > conftest.$ac_ext +mkdir out +# According to Tom Tromey, Ian Lance Taylor reported there are C compilers +# that will create temporary files in the current directory regardless of +# the output directory. Thus, making CWD read-only will cause this test +# to fail, enabling locking or at least warning the user not to do parallel +# builds. +chmod -w . +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -o out/conftest2.$ac_objext" +compiler_c_o=no +if { (eval echo configure:3765: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.$ac_objext; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s out/conftest.err; then + lt_cv_compiler_c_o=no + else + lt_cv_compiler_c_o=yes + fi +else + # Append any errors to the config.log. + cat out/conftest.err 1>&5 + lt_cv_compiler_c_o=no +fi +CFLAGS="$save_CFLAGS" +chmod u+w . +$rm conftest* out/* +rmdir out +cd .. +rmdir conftest +$rm -r conftest 2>/dev/null + +fi + +compiler_c_o=$lt_cv_compiler_c_o +echo "$ac_t""$compiler_c_o" 1>&6 + +if test x"$compiler_c_o" = x"yes"; then + # Check to see if we can write to a .lo + echo $ac_n "checking if $compiler supports -c -o file.lo""... $ac_c" 1>&6 +echo "configure:3794: checking if $compiler supports -c -o file.lo" >&5 + if eval "test \"`echo '$''{'lt_cv_compiler_o_lo'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + + lt_cv_compiler_o_lo=no + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -c -o conftest.lo" + save_objext="$ac_objext" + ac_objext=lo + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + lt_cv_compiler_o_lo=no + else + lt_cv_compiler_o_lo=yes + fi + +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -f conftest* + ac_objext="$save_objext" + CFLAGS="$save_CFLAGS" + +fi + + compiler_o_lo=$lt_cv_compiler_o_lo + echo "$ac_t""$compiler_o_lo" 1>&6 +else + compiler_o_lo=no +fi + +# Check to see if we can do hard links to lock some files if needed +hard_links="nottested" +if test "$compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + echo $ac_n "checking if we can lock with hard links""... $ac_c" 1>&6 +echo "configure:3843: checking if we can lock with hard links" >&5 + hard_links=yes + $rm conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + echo "$ac_t""$hard_links" 1>&6 + if test "$hard_links" = no; then + echo "configure: warning: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" 1>&2 + need_locks=warn + fi +else + need_locks=no +fi + +if test "$GCC" = yes; then + # Check to see if options -fno-rtti -fno-exceptions are supported by compiler + echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions""... $ac_c" 1>&6 +echo "configure:3862: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 + echo "int some_variable = 0;" > conftest.$ac_ext + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.$ac_ext" + compiler_rtti_exceptions=no + cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + compiler_rtti_exceptions=no + else + compiler_rtti_exceptions=yes + fi + +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -f conftest* + CFLAGS="$save_CFLAGS" + echo "$ac_t""$compiler_rtti_exceptions" 1>&6 + + if test "$compiler_rtti_exceptions" = "yes"; then + no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions' + else + no_builtin_flag=' -fno-builtin' + fi +fi + +# See if the linker supports building shared libraries. +echo $ac_n "checking whether the linker ($LD) supports shared libraries""... $ac_c" 1>&6 +echo "configure:3902: checking whether the linker ($LD) supports shared libraries" >&5 + +allow_undefined_flag= +no_undefined_flag= +need_lib_prefix=unknown +need_version=unknown +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +archive_cmds= +archive_expsym_cmds= +old_archive_from_new_cmds= +old_archive_from_expsyms_cmds= +export_dynamic_flag_spec= +whole_archive_flag_spec= +thread_safe_flag_spec= +hardcode_into_libs=no +hardcode_libdir_flag_spec= +hardcode_libdir_separator= +hardcode_direct=no +hardcode_minus_L=no +hardcode_shlibpath_var=unsupported +runpath_var= +link_all_deplibs=unknown +always_export_symbols=no +export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols' +# include_expsyms should be a list of space-separated symbols to be *always* +# included in the symbol list +include_expsyms= +# exclude_expsyms can be an egrep regular expression of symbols to exclude +# it will be wrapped by ` (' and `)$', so one must not match beginning or +# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', +# as well as any symbol that contains `d'. +exclude_expsyms="_GLOBAL_OFFSET_TABLE_" +# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out +# platforms (ab)use it in PIC code, but their linkers get confused if +# the symbol is explicitly referenced. Since portable code cannot +# rely on this symbol name, it's probably fine to never include it in +# preloaded symbol tables. +extract_expsyms_cmds= + +case $host_os in +cygwin* | mingw* | pw32*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$GCC" != yes; then + with_gnu_ld=no + fi + ;; +openbsd*) + with_gnu_ld=no + ;; +esac + +ld_shlibs=yes +if test "$with_gnu_ld" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # See if GNU ld supports shared libraries. + case $host_os in + aix3* | aix4* | aix5*) + # On AIX, the GNU linker is very broken + # Note:Check GNU linker on AIX 5-IA64 when/if it becomes available. + ld_shlibs=no + cat <&2 + +*** Warning: the GNU linker, at least up to release 2.9.1, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to modify your PATH +*** so that a non-GNU linker is found, and then restart. + +EOF + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + + # Samuel A. Falvo II reports + # that the semantics of dynamic libraries on AmigaOS, at least up + # to version 4, is to share data among multiple programs linked + # with the same dynamic library. Since this doesn't match the + # behavior of shared libraries on other platforms, we can use + # them. + ld_shlibs=no + ;; + + beos*) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32*) + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + allow_undefined_flag=unsupported + always_export_symbols=yes + + extract_expsyms_cmds='test -f $output_objdir/impgen.c || \ + sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //;s/^# *$//; p; }" -e d < $''0 > $output_objdir/impgen.c~ + test -f $output_objdir/impgen.exe || (cd $output_objdir && \ + if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \ + else $CC -o impgen impgen.c ; fi)~ + $output_objdir/impgen $dir/$soroot > $output_objdir/$soname-def' + + old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib' + + # cygwin and mingw dlls have different entry points and sets of symbols + # to exclude. + # FIXME: what about values for MSVC? + dll_entry=__cygwin_dll_entry@12 + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~ + case $host_os in + mingw*) + # mingw values + dll_entry=_DllMainCRTStartup@12 + dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~ + ;; + esac + + # mingw and cygwin differ, and it's simplest to just exclude the union + # of the two symbol sets. + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12 + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one (in ltdll.c) + if test "x$lt_cv_need_dllmain" = "xyes"; then + ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext " + ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $''0 > $output_objdir/$soname-ltdll.c~ + test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~' + else + ltdll_obj= + ltdll_cmds= + fi + + # Extract the symbol export list from an `--export-all' def file, + # then regenerate the def file from the symbol export list, so that + # the compiled dll only exports the symbol export list. + # Be careful not to strip the DATA tag left be newer dlltools. + export_symbols_cmds="$ltdll_cmds"' + $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~ + sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols' + + # If the export-symbols file already is a .def file (1st line + # is EXPORTS), use it as is. + # If DATA tags from a recent dlltool are present, honour them! + archive_expsym_cmds='if test "x`sed 1q $export_symbols`" = xEXPORTS; then + cp $export_symbols $output_objdir/$soname-def; + else + echo EXPORTS > $output_objdir/$soname-def; + _lt_hint=1; + cat $export_symbols | while read symbol; do + set dummy \$symbol; + case \$# in + 2) echo " \$2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; + 4) echo " \$2 \$3 \$4 ; " >> $output_objdir/$soname-def; _lt_hint=`expr \$_lt_hint - 1`;; + *) echo " \$2 @ \$_lt_hint \$3 ; " >> $output_objdir/$soname-def;; + esac; + _lt_hint=`expr 1 + \$_lt_hint`; + done; + fi~ + '"$ltdll_cmds"' + $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~ + $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp --output-lib $output_objdir/$libname.dll.a~ + $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags' + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris* | sysv5*) + if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +EOF + elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = yes; then + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + case $host_os in + cygwin* | mingw* | pw32*) + # dlltool doesn't understand --whole-archive et. al. + whole_archive_flag_spec= + ;; + *) + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + ;; + esac + fi +else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$GCC" = yes && test -z "$link_static_flag"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix4* | aix5*) + if test "$host_cpu" = ia64; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag="" + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # need to do runtime linking. + case $host_os in aix4.[23]|aix4.[23].*|aix5*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + hardcode_direct=yes + archive_cmds='' + hardcode_libdir_separator=':' + if test "$GCC" = yes; then + case $host_os in aix4.[012]|aix4.[012].*) + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && \ + strings "$collect2name" | grep resolve_lib_name >/dev/null + then + # We have reworked collect2 + hardcode_direct=yes + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + esac + + shared_flag='-shared' + else + # not using gcc + if test "$host_cpu" = ia64; then + shared_flag='${wl}-G' + else + if test "$aix_use_runtimelinking" = yes; then + shared_flag='${wl}-G' + else + shared_flag='${wl}-bM:SRE' + fi + fi + fi + + # It seems that -bexpall can do strange things, so it is better to + # generate a list of symbols to export. + always_export_symbols=yes + if test "$aix_use_runtimelinking" = yes; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib' + archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols $shared_flag" + else + if test "$host_cpu" = ia64; then + hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" + else + hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib' + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='${wl}-berok' + # This is a bit strange, but is similar to how AIX traditionally builds + # it's shared libraries. + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname' + fi + fi + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + # see comment about different semantics on the GNU ld section + ld_shlibs=no + ;; + + cygwin* | mingw* | pw32*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs' + fix_srcfile_path='`cygpath -w "$srcfile"`' + ;; + + darwin* | rhapsody*) + case "$host_os" in + rhapsody* | darwin1.[012]) + allow_undefined_flag='-undefined suppress' + ;; + *) # Darwin 1.3 on + allow_undefined_flag='-flat_namespace -undefined suppress' + ;; + esac + # FIXME: Relying on posixy $() will cause problems for + # cross-compilation, but unfortunately the echo tests do not + # yet detect zsh echo's removal of \ escapes. Also zsh mangles + # `"' quotes if we put them in here... so don't! + archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$linker_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)' + # We need to add '_' to the symbols in $export_symbols first + #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols' + hardcode_direct=yes + hardcode_shlibpath_var=no + whole_archive_flag_spec='-all_load $convenience' + ;; + + freebsd1*) + ld_shlibs=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd*) + archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9* | hpux10* | hpux11*) + case $host_os in + hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;; + *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;; + esac + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_minus_L=yes # Not in the search PATH, but as the default + # location of the library. + export_dynamic_flag_spec='${wl}-E' + ;; + + irix5* | irix6* | nonstopux*) + if test "$GCC" = yes; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='-rpath $libdir' + fi + hardcode_libdir_separator=: + link_all_deplibs=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + openbsd*) + hardcode_direct=yes + hardcode_shlibpath_var=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + export_dynamic_flag_spec='${wl}-E' + else + case "$host_os" in + openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + ;; + *) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='${wl}-rpath,$libdir' + ;; + esac + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + fi + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$GCC" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "-exported_symbol " >> $lib.exp; echo "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ + $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib~$rm $lib.exp' + + #Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + hardcode_libdir_separator=: + ;; + + sco3.2v5*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + export_dynamic_flag_spec='${wl}-Bexport' + ;; + + solaris*) + # gcc --version < 3.0 without binutils cannot create self contained + # shared libraries reliably, requiring libgcc.a to resolve some of + # the object symbols generated in some cases. Libraries that use + # assert need libgcc.a to resolve __eprintf, for example. Linking + # a copy of libgcc.a into every shared library to guarantee resolving + # such symbols causes other problems: According to Tim Van Holder + # , C++ libraries end up with a separate + # (to the application) exception stack for one thing. + no_undefined_flag=' -z defs' + if test "$GCC" = yes; then + case `$CC --version 2>/dev/null` in + [12].*) + cat <&2 + +*** Warning: Releases of GCC earlier than version 3.0 cannot reliably +*** create self contained shared libraries on Solaris systems, without +*** introducing a dependency on libgcc.a. Therefore, libtool is disabling +*** -no-undefined support, which will at least allow you to build shared +*** libraries. However, you may find that when you link such libraries +*** into an application without using GCC, you have to manually add +*** \`gcc --print-libgcc-file-name\` to the link command. We urge you to +*** upgrade to a newer version of GCC. Another option is to rebuild your +*** current GCC to use the GNU linker from GNU binutils 2.9.1 or newer. + +EOF + no_undefined_flag= + ;; + esac + fi + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test "x$host_vendor" = xsequent; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv5*) + no_undefined_flag=' -z text' + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec= + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4.2uw2*) + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=no + hardcode_shlibpath_var=no + hardcode_runpath_var=yes + runpath_var=LD_RUN_PATH + ;; + + sysv5uw7* | unixware7*) + no_undefined_flag='${wl}-z ${wl}text' + if test "$GCC" = yes; then + archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac +fi +echo "$ac_t""$ld_shlibs" 1>&6 +test "$ld_shlibs" = no && can_build_shared=no + +# Check hardcoding attributes. +echo $ac_n "checking how to hardcode library paths into programs""... $ac_c" 1>&6 +echo "configure:4601: checking how to hardcode library paths into programs" >&5 +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || \ + test -n "$runpath_var"; then + + # We can hardcode non-existant directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$hardcode_shlibpath_var" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +echo "$ac_t""$hardcode_action" 1>&6 + +striplib= +old_striplib= +echo $ac_n "checking whether stripping libraries is possible""... $ac_c" 1>&6 +echo "configure:4629: checking whether stripping libraries is possible" >&5 +if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + echo "$ac_t""yes" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +reload_cmds='$LD$reload_flag -o $output$reload_objs' +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +# PORTME Fill in your ld.so characteristics +echo $ac_n "checking dynamic linker characteristics""... $ac_c" 1>&6 +echo "configure:4643: checking dynamic linker characteristics" >&5 +library_names_spec= +libname_spec='lib$name' +soname_spec= +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + +case $host_os in +aix3*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}.so$major' + ;; + +aix4* | aix5*) + version_type=linux + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test "$host_cpu" = ia64; then + # AIX 5 supports IA64 + library_names_spec='${libname}${release}.so$major ${libname}${release}.so$versuffix $libname.so' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line `#! .'. This would cause the generated library to + # depend on `.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # AIX (on Power*) has no versioning support, so currently we can + # not hardcode correct soname into executable. Probably we can + # add versioning support to collect2, so additional links can + # be useful in future. + if test "$aix_use_runtimelinking" = yes; then + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + else + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='${libname}${release}.a $libname.a' + soname_spec='${libname}${release}.so$major' + fi + shlibpath_var=LIBPATH + fi + hardcode_into_libs=yes + ;; + +amigaos*) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' + ;; + +beos*) + library_names_spec='${libname}.so' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi4*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + export_dynamic_flag_spec=-rdynamic + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32*) + version_type=windows + need_version=no + need_lib_prefix=no + case $GCC,$host_os in + yes,cygwin*) + library_names_spec='$libname.dll.a' + soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' + postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog .libs/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`bash 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $rm \$dlpath' + ;; + yes,mingw*) + library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' + sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g" -e "s,=/,/,g"` + ;; + yes,pw32*) + library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/./-/g'`${versuffix}.dll' + ;; + *) + library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib' + ;; + esac + dynamic_linker='Win32 ld.exe' + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + # FIXME: Relying on posixy $() will cause problems for + # cross-compilation, but unfortunately the echo tests do not + # yet detect zsh echo's removal of \ escapes. + library_names_spec='${libname}${release}${versuffix}.$(test .$module = .yes && echo so || echo dylib) ${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib) ${libname}.$(test .$module = .yes && echo so || echo dylib)' + soname_spec='${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib)' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd*) + objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout` + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + *) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + dynamic_linker="$host_os dld.sl" + version_type=sunos + need_lib_prefix=no + need_version=no + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl' + soname_spec='${libname}${release}.sl$major' + # HP-UX runs *really* slowly unless shared libraries are mode 555. + postinstall_cmds='chmod 555 $lib' + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) version_type=irix ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux-gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + + # Find out which ABI we are using (multilib Linux x86_64 hack). + libsuff= + case "$host_cpu" in + x86_64*|s390x*) + echo '#line 4902 "configure"' > conftest.$ac_ext + if { (eval echo configure:4903: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + case `/usr/bin/file conftest.$ac_objext` in + *64-bit*) + libsuff=64 + ;; + esac + fi + rm -rf conftest* + ;; + *) + ;; + esac + sys_lib_dlsearch_path_spec="/lib${libsuff} /usr/lib${libsuff}" + sys_lib_search_path_spec="/lib${libsuff} /usr/lib${libsuff} /usr/local/lib${libsuff}" + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so' + soname_spec='${libname}${release}.so$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +openbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + case "$host_os" in + openbsd2.[89] | openbsd2.[89].*) + shlibpath_overrides_runpath=no + ;; + *) + shlibpath_overrides_runpath=yes + ;; + esac + else + shlibpath_overrides_runpath=yes + fi + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + ;; + +os2*) + libname_spec='$name' + need_lib_prefix=no + library_names_spec='$libname.dll $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_version=no + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + hardcode_into_libs=yes + ;; + +sco3.2v5*) + version_type=osf + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + export_dynamic_flag_spec='${wl}-Blargedynsym' + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so' + soname_spec='$libname.so.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +*) + dynamic_linker=no + ;; +esac +echo "$ac_t""$dynamic_linker" 1>&6 +test "$dynamic_linker" = no && can_build_shared=no + +# Report the final consequences. +echo $ac_n "checking if libtool supports shared libraries""... $ac_c" 1>&6 +echo "configure:5074: checking if libtool supports shared libraries" >&5 +echo "$ac_t""$can_build_shared" 1>&6 + +echo $ac_n "checking whether to build shared libraries""... $ac_c" 1>&6 +echo "configure:5078: checking whether to build shared libraries" >&5 +test "$can_build_shared" = "no" && enable_shared=no + +# On AIX, shared libraries and static libraries use the same namespace, and +# are all built from PIC. +case "$host_os" in +aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + +aix4*) + if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then + test "$enable_shared" = yes && enable_static=no + fi + ;; +esac +echo "$ac_t""$enable_shared" 1>&6 + +echo $ac_n "checking whether to build static libraries""... $ac_c" 1>&6 +echo "configure:5101: checking whether to build static libraries" >&5 +# Make sure either enable_shared or enable_static is yes. +test "$enable_shared" = yes || enable_static=yes +echo "$ac_t""$enable_static" 1>&6 + +if test "$hardcode_action" = relink; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test "$GCC" = yes; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + cygwin* | mingw* | pw32*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + *) + echo $ac_n "checking for shl_load""... $ac_c" 1>&6 +echo "configure:5142: checking for shl_load" >&5 +if eval "test \"`echo '$''{'ac_cv_func_shl_load'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char shl_load(); + +int main() { + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_shl_load) || defined (__stub___shl_load) +choke me +#else +shl_load(); +#endif + +; return 0; } +EOF +if { (eval echo configure:5170: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_func_shl_load=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_func_shl_load=no" +fi +rm -f conftest* +fi + +if eval "test \"`echo '$ac_cv_func_'shl_load`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="shl_load" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6 +echo "configure:5188: checking for shl_load in -ldld" >&5 +ac_lib_var=`echo dld'_'shl_load | sed 'y%./+-%__p_%'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldld $LIBS" +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=no" +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dlopen""... $ac_c" 1>&6 +echo "configure:5226: checking for dlopen" >&5 +if eval "test \"`echo '$''{'ac_cv_func_dlopen'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char dlopen(); + +int main() { + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_dlopen) || defined (__stub___dlopen) +choke me +#else +dlopen(); +#endif + +; return 0; } +EOF +if { (eval echo configure:5254: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_func_dlopen=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_func_dlopen=no" +fi +rm -f conftest* +fi + +if eval "test \"`echo '$ac_cv_func_'dlopen`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dlopen" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 +echo "configure:5272: checking for dlopen in -ldl" >&5 +ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldl $LIBS" +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=no" +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dlopen in -lsvld""... $ac_c" 1>&6 +echo "configure:5310: checking for dlopen in -lsvld" >&5 +ac_lib_var=`echo svld'_'dlopen | sed 'y%./+-%__p_%'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lsvld $LIBS" +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=no" +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6 +echo "configure:5348: checking for dld_link in -ldld" >&5 +ac_lib_var=`echo dld'_'dld_link | sed 'y%./+-%__p_%'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldld $LIBS" +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=no" +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld" +else + echo "$ac_t""no" 1>&6 +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + else + enable_dlopen=no + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS="$CPPFLAGS" + test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS="$LDFLAGS" + eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS="$LIBS" + LIBS="$lt_cv_dlopen_libs $LIBS" + + echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6 +echo "configure:5423: checking whether a program can dlopen itself" >&5 +if eval "test \"`echo '$''{'lt_cv_dlopen_self'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext < +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +#ifdef __cplusplus +extern "C" void exit (int); +#endif + +void fnord() { int i=42;} +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + /* dlclose (self); */ + } + + exit (status); +} +EOF + if { (eval echo configure:5494: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_unknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi + +echo "$ac_t""$lt_cv_dlopen_self" 1>&6 + + if test "x$lt_cv_dlopen_self" = xyes; then + LDFLAGS="$LDFLAGS $link_static_flag" + echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6 +echo "configure:5517: checking whether a statically linked program can dlopen itself" >&5 +if eval "test \"`echo '$''{'lt_cv_dlopen_self_static'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext < +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +#ifdef __cplusplus +extern "C" void exit (int); +#endif + +void fnord() { int i=42;} +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + /* dlclose (self); */ + } + + exit (status); +} +EOF + if { (eval echo configure:5588: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} 2>/dev/null; then + (./conftest; exit; ) 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_unknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi + +echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6 + fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + +if test "$enable_shared" = yes && test "$GCC" = yes; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + echo $ac_n "checking whether -lc should be explicitly linked in""... $ac_c" 1>&6 +echo "configure:5637: checking whether -lc should be explicitly linked in" >&5 + if eval "test \"`echo '$''{'lt_cv_archive_cmds_need_lc'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + $rm conftest* + echo 'static int dummy;' > conftest.$ac_ext + + if { (eval echo configure:5644: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_cv_prog_cc_wl + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { (eval echo configure:5657: \"$archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1\") 1>&5; (eval $archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) 2>&5; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $rm conftest* +fi + + echo "$ac_t""$lt_cv_archive_cmds_need_lc" 1>&6 + ;; + esac +fi +need_lc=${lt_cv_archive_cmds_need_lc-yes} + +# The second clause should only fire when bootstrapping the +# libtool distribution, otherwise you forgot to ship ltmain.sh +# with your package, and you will get complaints that there are +# no rules to generate ltmain.sh. +if test -f "$ltmain"; then + : +else + # If there is no Makefile yet, we rely on a make rule to execute + # `config.status --recheck' to rerun these tests and create the + # libtool script then. + test -f Makefile && make "$ltmain" +fi + +if test -f "$ltmain"; then + trap "$rm \"${ofile}T\"; exit 1" 1 2 15 + $rm -f "${ofile}T" + + echo creating $ofile + + # Now quote all the things that may contain metacharacters while being + # careful not to overquote the AC_SUBSTed values. We take copies of the + # variables and quote the copies for generation of the libtool script. + for var in echo old_CC old_CFLAGS SED \ + AR AR_FLAGS CC LD LN_S NM SHELL \ + reload_flag reload_cmds wl \ + pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \ + thread_safe_flag_spec whole_archive_flag_spec libname_spec \ + library_names_spec soname_spec \ + RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ + old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \ + postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \ + old_striplib striplib file_magic_cmd export_symbols_cmds \ + deplibs_check_method allow_undefined_flag no_undefined_flag \ + finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ + global_symbol_to_c_name_address \ + hardcode_libdir_flag_spec hardcode_libdir_separator \ + sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ + compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do + + case $var in + reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ + old_postinstall_cmds | old_postuninstall_cmds | \ + export_symbols_cmds | archive_cmds | archive_expsym_cmds | \ + extract_expsyms_cmds | old_archive_from_expsyms_cmds | \ + postinstall_cmds | postuninstall_cmds | \ + finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) + # Double-quote double-evaled strings. + eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" + ;; + *) + eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" + ;; + esac + done + + cat <<__EOF__ > "${ofile}T" +#! $SHELL + +# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) +# NOTE: Changes made to this file will be lost: look at ltmain.sh. +# +# Copyright (C) 1996-2000 Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit , 1996 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# A sed that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="${SED} -e s/^X//" + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi + +# ### BEGIN LIBTOOL CONFIG + +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$need_lc + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# The host system. +host_alias=$host_alias +host=$host + +# An echo program that does not interpret backslashes. +echo=$lt_echo + +# The archiver. +AR=$lt_AR +AR_FLAGS=$lt_AR_FLAGS + +# The default C compiler. +CC=$lt_CC + +# Is the compiler the GNU C compiler? +with_gcc=$GCC + +# The linker used to build libraries. +LD=$lt_LD + +# Whether we need hard or soft links. +LN_S=$lt_LN_S + +# A BSD-compatible nm program. +NM=$lt_NM + +# A symbol stripping program +STRIP=$STRIP + +# Used to examine libraries when file_magic_cmd begins "file" +MAGIC_CMD=$MAGIC_CMD + +# Used on cygwin: DLL creation program. +DLLTOOL="$DLLTOOL" + +# Used on cygwin: object dumper. +OBJDUMP="$OBJDUMP" + +# Used on cygwin: assembler. +AS="$AS" + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# How to pass a linker flag through the compiler. +wl=$lt_wl + +# Object file suffix (normally "o"). +objext="$ac_objext" + +# Old archive suffix (normally "a"). +libext="$libext" + +# Executable file suffix (normally ""). +exeext="$exeext" + +# Additional compiler flags for building library objects. +pic_flag=$lt_pic_flag +pic_mode=$pic_mode + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_compiler_c_o + +# Can we write directly to a .lo ? +compiler_o_lo=$lt_compiler_o_lo + +# Must we lock files when doing compilation ? +need_locks=$lt_need_locks + +# Do we need the lib prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_link_static_flag + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_no_builtin_flag + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Compiler flag to generate thread-safe objects. +thread_safe_flag_spec=$lt_thread_safe_flag_spec + +# Library versioning type. +version_type=$version_type + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME. +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Commands used to build and install an old-style archive. +RANLIB=$lt_RANLIB +old_archive_cmds=$lt_old_archive_cmds +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build and install a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds +postinstall_cmds=$lt_postinstall_cmds +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method == file_magic. +file_magic_cmd=$lt_file_magic_cmd + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that forces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# Same as above, but a single script fragment to be evaled but not shown. +finish_eval=$lt_finish_eval + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_global_symbol_pipe + +# Transform the output of nm in a proper C declaration +global_symbol_to_cdecl=$lt_global_symbol_to_cdecl + +# Transform the output of nm in a C name address pair +global_symbol_to_c_name_address=$lt_global_symbol_to_c_name_address + +# This is the shared library runtime path variable. +runpath_var=$runpath_var + +# This is the shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist. +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single -rpath flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the +# resulting binary. +hardcode_direct=$hardcode_direct + +# Set to yes if using the -LDIR flag during linking hardcodes DIR into the +# resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into +# the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at relink time. +variables_saved_for_relink="$variables_saved_for_relink" + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Compile-time system search path for libraries +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Run-time system search path for libraries +sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec + +# Fix the shell variable \$srcfile for the compiler. +fix_srcfile_path="$fix_srcfile_path" + +# Set to yes if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# ### END LIBTOOL CONFIG + +__EOF__ + + case $host_os in + aix3*) + cat <<\EOF >> "${ofile}T" + +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +EOF + ;; + esac + + case $host_os in + cygwin* | mingw* | pw32* | os2*) + cat <<'EOF' >> "${ofile}T" + # This is a source program that is used to create dlls on Windows + # Don't remove nor modify the starting and closing comments +# /* ltdll.c starts here */ +# #define WIN32_LEAN_AND_MEAN +# #include +# #undef WIN32_LEAN_AND_MEAN +# #include +# +# #ifndef __CYGWIN__ +# # ifdef __CYGWIN32__ +# # define __CYGWIN__ __CYGWIN32__ +# # endif +# #endif +# +# #ifdef __cplusplus +# extern "C" { +# #endif +# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); +# #ifdef __cplusplus +# } +# #endif +# +# #ifdef __CYGWIN__ +# #include +# DECLARE_CYGWIN_DLL( DllMain ); +# #endif +# HINSTANCE __hDllInstance_base; +# +# BOOL APIENTRY +# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) +# { +# __hDllInstance_base = hInst; +# return TRUE; +# } +# /* ltdll.c ends here */ + # This is a source program that is used to create import libraries + # on Windows for dlls which lack them. Don't remove nor modify the + # starting and closing comments +# /* impgen.c starts here */ +# /* Copyright (C) 1999-2000 Free Software Foundation, Inc. +# +# This file is part of GNU libtool. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# */ +# +# #include /* for printf() */ +# #include /* for open(), lseek(), read() */ +# #include /* for O_RDONLY, O_BINARY */ +# #include /* for strdup() */ +# +# /* O_BINARY isn't required (or even defined sometimes) under Unix */ +# #ifndef O_BINARY +# #define O_BINARY 0 +# #endif +# +# static unsigned int +# pe_get16 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[2]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 2); +# return b[0] + (b[1]<<8); +# } +# +# static unsigned int +# pe_get32 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[4]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 4); +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# static unsigned int +# pe_as32 (ptr) +# void *ptr; +# { +# unsigned char *b = ptr; +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# int +# main (argc, argv) +# int argc; +# char *argv[]; +# { +# int dll; +# unsigned long pe_header_offset, opthdr_ofs, num_entries, i; +# unsigned long export_rva, export_size, nsections, secptr, expptr; +# unsigned long name_rvas, nexp; +# unsigned char *expdata, *erva; +# char *filename, *dll_name; +# +# filename = argv[1]; +# +# dll = open(filename, O_RDONLY|O_BINARY); +# if (dll < 1) +# return 1; +# +# dll_name = filename; +# +# for (i=0; filename[i]; i++) +# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':') +# dll_name = filename + i +1; +# +# pe_header_offset = pe_get32 (dll, 0x3c); +# opthdr_ofs = pe_header_offset + 4 + 20; +# num_entries = pe_get32 (dll, opthdr_ofs + 92); +# +# if (num_entries < 1) /* no exports */ +# return 1; +# +# export_rva = pe_get32 (dll, opthdr_ofs + 96); +# export_size = pe_get32 (dll, opthdr_ofs + 100); +# nsections = pe_get16 (dll, pe_header_offset + 4 +2); +# secptr = (pe_header_offset + 4 + 20 + +# pe_get16 (dll, pe_header_offset + 4 + 16)); +# +# expptr = 0; +# for (i = 0; i < nsections; i++) +# { +# char sname[8]; +# unsigned long secptr1 = secptr + 40 * i; +# unsigned long vaddr = pe_get32 (dll, secptr1 + 12); +# unsigned long vsize = pe_get32 (dll, secptr1 + 16); +# unsigned long fptr = pe_get32 (dll, secptr1 + 20); +# lseek(dll, secptr1, SEEK_SET); +# read(dll, sname, 8); +# if (vaddr <= export_rva && vaddr+vsize > export_rva) +# { +# expptr = fptr + (export_rva - vaddr); +# if (export_rva + export_size > vaddr + vsize) +# export_size = vsize - (export_rva - vaddr); +# break; +# } +# } +# +# expdata = (unsigned char*)malloc(export_size); +# lseek (dll, expptr, SEEK_SET); +# read (dll, expdata, export_size); +# erva = expdata - export_rva; +# +# nexp = pe_as32 (expdata+24); +# name_rvas = pe_as32 (expdata+32); +# +# printf ("EXPORTS\n"); +# for (i = 0; i> "${ofile}T" || (rm -f "${ofile}T"; exit 1) + + mv -f "${ofile}T" "$ofile" || \ + (rm -f "$ofile" && cp "${ofile}T" "$ofile" && rm -f "${ofile}T") + chmod +x "$ofile" +fi + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + +# Prevent multiple expansion + + +fi +# if libtool >= 1.5 +TAGCC= + + +# Select memory manager depending on user input. +# If no "-enable-maxmem", use jmemnobs +MEMORYMGR='jmemnobs.$(O)' +MAXMEM="no" +# Check whether --enable-maxmem or --disable-maxmem was given. +if test "${enable_maxmem+set}" = set; then + enableval="$enable_maxmem" + MAXMEM="$enableval" +fi + +# support --with-maxmem for backwards compatibility with IJG V5. +# Check whether --with-maxmem or --without-maxmem was given. +if test "${with_maxmem+set}" = set; then + withval="$with_maxmem" + MAXMEM="$withval" +fi + +if test "x$MAXMEM" = xyes; then + MAXMEM=1 +fi +if test "x$MAXMEM" != xno; then + if test -n "`echo $MAXMEM | sed 's/[0-9]//g'`"; then + { echo "configure: error: non-numeric argument to --enable-maxmem" 1>&2; exit 1; } + fi + DEFAULTMAXMEM=`expr $MAXMEM \* 1048576` +cat >> confdefs.h <&6 +echo "configure:6277: checking for 'tmpfile()'" >&5 +cat > conftest.$ac_ext < +int main() { + FILE * tfile = tmpfile(); +; return 0; } +EOF +if { (eval echo configure:6286: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + echo "$ac_t""yes" 1>&6 +MEMORYMGR='jmemansi.$(O)' +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + echo "$ac_t""no" 1>&6 +MEMORYMGR='jmemname.$(O)' +cat >> confdefs.h <<\EOF +#define NEED_SIGNAL_CATCHER +EOF + +echo $ac_n "checking for 'mktemp()'""... $ac_c" 1>&6 +echo "configure:6301: checking for 'mktemp()'" >&5 +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:6310: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* echo "$ac_t""yes" 1>&6 else @@ -1644,11 +6325,354 @@ rm -f conftest* fi -# Extract the library version ID from jpeglib.h. -echo $ac_n "checking libjpeg version number""... $ac_c" 1>&6 -echo "configure:1650: checking libjpeg version number" >&5 -JPEG_LIB_VERSION=`sed -e '/^#define JPEG_LIB_VERSION/!d' -e 's/^[^0-9]*\([0-9][0-9]*\).*$/\1/' $srcdir/jpeglib.h` -echo "$ac_t""$JPEG_LIB_VERSION" 1>&6 + +echo $ac_n "checking to see if the host cpu type is i386 or compatible""... $ac_c" 1>&6 +echo "configure:6331: checking to see if the host cpu type is i386 or compatible" >&5 +case "$host_cpu" in + i*86 | x86 | ia32) + echo "$ac_t""yes" 1>&6 + ;; + x86_64 | amd64 | aa64) + echo "$ac_t""no (x86_64)" 1>&6 + { echo "configure: error: Currently, this version of JPEG library cannot be compiled as 64-bit code. sorry." 1>&2; exit 1; } + ;; + *) + echo "$ac_t""no ("$host_cpu")" 1>&6 + { echo "configure: error: This version of JPEG library is for i386 or compatible processors only." 1>&2; exit 1; } + ;; +esac + +if test -z "$NAFLAGS" ; then + echo $ac_n "checking for object file format of host system""... $ac_c" 1>&6 +echo "configure:6348: checking for object file format of host system" >&5 + case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + objfmt='Win32-COFF' + ;; + msdosdjgpp* | go32*) + objfmt='COFF' + ;; + os2-emx*) # not tested + objfmt='MSOMF' # obj + ;; + linux*coff* | linux*oldld*) + objfmt='COFF' # ??? + ;; + linux*aout*) + objfmt='a.out' + ;; + linux*) + objfmt='ELF' + ;; + freebsd* | netbsd* | openbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + objfmt='BSD-a.out' + else + objfmt='ELF' + fi + ;; + solaris* | sunos* | sysv* | sco*) + objfmt='ELF' + ;; + darwin* | rhapsody* | nextstep* | openstep* | macos*) + objfmt='Mach-O' + ;; + *) + objfmt='ELF ?' + ;; + esac + echo "$ac_t""$objfmt" 1>&6 + if test "$objfmt" = 'ELF ?'; then + objfmt='ELF' + echo "configure: warning: unexpected host system. assumed that the format is $objfmt." 1>&2 + fi +else + objfmt='' +fi +echo $ac_n "checking for object file format specifier (NAFLAGS) ""... $ac_c" 1>&6 +echo "configure:6394: checking for object file format specifier (NAFLAGS) " >&5 +case "$objfmt" in + MSOMF) NAFLAGS='-fobj -DOBJ32';; + Win32-COFF) NAFLAGS='-fwin32 -DWIN32';; + COFF) NAFLAGS='-fcoff -DCOFF';; + a.out) NAFLAGS='-faout -DAOUT';; + BSD-a.out) NAFLAGS='-faoutb -DAOUT';; + ELF) NAFLAGS='-felf -DELF';; + RDF) NAFLAGS='-frdf -DRDF';; + Mach-O) NAFLAGS='-fmacho -DMACHO';; +esac +echo "$ac_t""$NAFLAGS" 1>&6 + + + +for ac_prog in nasm nasmw +do +# Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +echo "configure:6414: checking for $ac_word" >&5 +if eval "test \"`echo '$''{'ac_cv_prog_NASM'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$NASM"; then + ac_cv_prog_NASM="$NASM" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_NASM="$ac_prog" + break + fi + done + IFS="$ac_save_ifs" +fi +fi +NASM="$ac_cv_prog_NASM" +if test -n "$NASM"; then + echo "$ac_t""$NASM" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +test -n "$NASM" && break +done + +test -z "$NASM" && { echo "configure: error: no nasm (Netwide Assembler) found in \$PATH" 1>&2; exit 1; } +if echo "$NASM" | grep yasm > /dev/null; then + echo "configure: warning: DON'T USE YASM! CURRENT VERSION (R0.4.0) IS BUGGY!" 1>&2 +fi + +echo $ac_n "checking whether the assembler ($NASM $NAFLAGS) works""... $ac_c" 1>&6 +echo "configure:6449: checking whether the assembler ($NASM $NAFLAGS) works" >&5 +cat > conftest.asm <&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then + echo "$ac_t""yes" 1>&6 +else + echo "configure: failed program was:" >&5 + cat conftest.asm >&5 + rm -rf conftest* + echo "$ac_t""no" 1>&6 + { echo "configure: error: installation or configuration problem: assembler cannot create object files." 1>&2; exit 1; } +fi +echo $ac_n "checking whether the linker accepts assembler output""... $ac_c" 1>&6 +echo "configure:6470: checking whether the linker accepts assembler output" >&5 +try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&5' +if { (eval echo configure:6472: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + echo "$ac_t""yes" 1>&6 +else + rm -rf conftest* + echo "$ac_t""no" 1>&6 + { echo "configure: error: configuration problem: maybe object file format mismatch." 1>&2; exit 1; } +fi + +echo $ac_n "checking whether the assembler supports line continuation character""... $ac_c" 1>&6 +echo "configure:6482: checking whether the assembler supports line continuation character" >&5 +cat > conftest.asm <<\EOF +%line 6484 "configure" +; The line continuation character '\' +; was introduced in nasm 0.98.25. + section .text + bits 32 + global _zero +_zero: xor \ + eax,eax + ret +EOF +try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm' +if { (eval echo configure:6495: \"$try_nasm\") 1>&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then + rm -rf conftest* + echo "$ac_t""yes" 1>&6 +else + echo "configure: failed program was:" >&5 + cat conftest.asm >&5 + rm -rf conftest* + echo "$ac_t""no" 1>&6 + { echo "configure: error: you have to use a more recent version of the assembler." 1>&2; exit 1; } +fi + + +echo $ac_n "checking SIMD instruction sets requested to use""... $ac_c" 1>&6 +echo "configure:6508: checking SIMD instruction sets requested to use" >&5 +simd_to_use="" + +# Check whether --enable-mmx or --disable-mmx was given. +if test "${enable_mmx+set}" = set; then + enableval="$enable_mmx" + if test "x$enableval" = xno; then + cat >> confdefs.h <<\EOF +#define JSIMD_MMX_NOT_SUPPORTED +EOF + +else + simd_to_use="$simd_to_use MMX" +fi +else + simd_to_use="$simd_to_use MMX" +fi + + +# Check whether --enable-3dnow or --disable-3dnow was given. +if test "${enable_3dnow+set}" = set; then + enableval="$enable_3dnow" + if test "x$enableval" = xno; then + cat >> confdefs.h <<\EOF +#define JSIMD_3DNOW_NOT_SUPPORTED +EOF + +else + simd_to_use="$simd_to_use 3DNow!" +fi +else + simd_to_use="$simd_to_use 3DNow!" +fi + + +# Check whether --enable-sse or --disable-sse was given. +if test "${enable_sse+set}" = set; then + enableval="$enable_sse" + if test "x$enableval" = xno; then + cat >> confdefs.h <<\EOF +#define JSIMD_SSE_NOT_SUPPORTED +EOF + +else + simd_to_use="$simd_to_use SSE" +fi +else + simd_to_use="$simd_to_use SSE" +fi + + +# Check whether --enable-sse2 or --disable-sse2 was given. +if test "${enable_sse2+set}" = set; then + enableval="$enable_sse2" + if test "x$enableval" = xno; then + cat >> confdefs.h <<\EOF +#define JSIMD_SSE2_NOT_SUPPORTED +EOF + +else + simd_to_use="$simd_to_use SSE2" +fi +else + simd_to_use="$simd_to_use SSE2" +fi + + +test -z "$simd_to_use" && simd_to_use="NONE" +echo "$ac_t""$simd_to_use" 1>&6 + +for simd_name in $simd_to_use; do +case "$simd_name" in + MMX) simd_instruction='psubw mm0,mm0';; + 3DNow!) simd_instruction='pfsub mm0,mm0';; + SSE) simd_instruction='subps xmm0,xmm0';; + SSE2) simd_instruction='subpd xmm0,xmm0';; + *) continue;; +esac +echo $ac_n "checking whether the assembler supports $simd_name instructions""... $ac_c" 1>&6 +echo "configure:6587: checking whether the assembler supports $simd_name instructions" >&5 +cat > conftest.asm <&5; (eval $try_nasm) 2>&5; } && test -s conftest.o; then + rm -rf conftest* + echo "$ac_t""yes" 1>&6 +else + echo "configure: failed program was:" >&5 + cat conftest.asm >&5 + rm -rf conftest* + echo "$ac_t""no" 1>&6 + { echo "configure: error: you have to use a more recent version of the assembler." 1>&2; exit 1; } +fi +done + +# Select OS-dependent SIMD instruction support checker. +# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc) +if test "x$SIMDCHECKER" = x ; then + case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + SIMDCHECKER='jsimdw32.$(O)' + ;; + msdosdjgpp* | go32*) + SIMDCHECKER='jsimddjg.$(O)' + ;; + os2-emx*) # not tested + SIMDCHECKER='jsimdgcc.$(O)' + ;; + *) + SIMDCHECKER='jsimdgcc.$(O)' + ;; + esac +fi + + +case "$host_os" in + cygwin* | mingw* | pw32* | os2-emx* | msdosdjgpp* | go32*) + cat >> confdefs.h <<\EOF +#define USE_SETMODE +EOF + + ;; +# _host_name_*) +# AC_DEFINE([USE_FDOPEN],) +# ;; +esac + +# This is for UNIX-like environments on Windows platform. +# Check whether --enable-uchar-boolean or --disable-uchar-boolean was given. +if test "${enable_uchar_boolean+set}" = set; then + enableval="$enable_uchar_boolean" + if test "x$enableval" != xno; then + cat >> confdefs.h <<\EOF +#define TYPEDEF_UCHAR_BOOLEAN +EOF + +fi +fi + + + +JPEG_LIB_VERSION="63:0:1" +confv_dirs="$srcdir $srcdir/.. $srcdir/../.." +config_ver= +for ac_dir in $confv_dirs; do + if test -r $ac_dir/config.ver; then + config_ver=$ac_dir/config.ver + break + fi +done +if test -z "$config_ver"; then + echo "configure: warning: cannot find config.ver in $confv_dirs" 1>&2 + echo "configure: warning: default version number $JPEG_LIB_VERSION is used" 1>&2 + echo $ac_n "checking libjpeg version number for libtool""... $ac_c" 1>&6 +echo "configure:6668: checking libjpeg version number for libtool" >&5 + echo "$ac_t""$JPEG_LIB_VERSION" 1>&6 +else + echo $ac_n "checking libjpeg version number for libtool""... $ac_c" 1>&6 +echo "configure:6672: checking libjpeg version number for libtool" >&5 + . $config_ver + echo "$ac_t""$JPEG_LIB_VERSION" 1>&6 + echo "configure: if you want to change the version number, modify $config_ver" 1>&2 +fi # Prepare to massage makefile.cfg correctly. @@ -1675,12 +6699,15 @@ else COM_LT="# " fi -if test "x$LTSHARED" != xno; then +if test "x$enable_shared" != xno; then FORCE_INSTALL_LIB="install-lib" + UNINSTALL_LIB="uninstall-lib" else FORCE_INSTALL_LIB="" + UNINSTALL_LIB="" fi + # Set up -I directives if test "x$srcdir" = x.; then INCLUDEFLAGS='-I$(srcdir)' @@ -1689,6 +6716,52 @@ else fi trap '' 1 2 15 +cat > confcache <<\EOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs. It is not useful on other systems. +# If it contains results you don't want to keep, you may remove or edit it. +# +# By default, configure uses ./config.cache as the cache file, +# creating it if it does not exist already. You can give configure +# the --cache-file=FILE option to use a different cache file; that is +# what configure does when it calls configure scripts in +# subdirectories, so they share the cache. +# Giving --cache-file=/dev/null disables caching, for debugging configure. +# config.status only pays attention to the cache file if you give it the +# --recheck option to rerun configure. +# +EOF +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +(set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote substitution + # turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + -e "s/'/'\\\\''/g" \ + -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' + ;; + esac >> confcache +if cmp -s $cache_file confcache; then + : +else + if test -w $cache_file; then + echo "updating cache $cache_file" + cat confcache > $cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 @@ -1732,7 +6805,7 @@ do echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.12" + echo "$CONFIG_STATUS generated by autoconf version 2.13" exit 0 ;; -help | --help | --hel | --he | --h) echo "\$ac_cs_usage"; exit 0 ;; @@ -1752,9 +6825,11 @@ sed 's/%@/@@/; s/@%/@@/; s/%g\$/@g/; /@g\$/s/[\\\\&%]/\\\\&/g; s/@@/%@/; s/@@/@%/; s/@g\$/%g/' > conftest.subs <<\\CEOF $ac_vpsub $extrasub +s%@SHELL@%$SHELL%g s%@CFLAGS@%$CFLAGS%g s%@CPPFLAGS@%$CPPFLAGS%g s%@CXXFLAGS@%$CXXFLAGS%g +s%@FFLAGS@%$FFLAGS%g s%@DEFS@%$DEFS%g s%@LDFLAGS@%$LDFLAGS%g s%@LIBS@%$LIBS%g @@ -1776,20 +6851,45 @@ s%@mandir@%$mandir%g s%@CC@%$CC%g s%@CPP@%$CPP%g s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g +s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g s%@INSTALL_DATA@%$INSTALL_DATA%g s%@RANLIB@%$RANLIB%g +s%@host@%$host%g +s%@host_alias@%$host_alias%g +s%@host_cpu@%$host_cpu%g +s%@host_vendor@%$host_vendor%g +s%@host_os@%$host_os%g +s%@EXEEXT@%$EXEEXT%g s%@LIBTOOL@%$LIBTOOL%g s%@O@%$O%g s%@A@%$A%g s%@LN@%$LN%g s%@INSTALL_LIB@%$INSTALL_LIB%g +s%@UNINSTALL@%$UNINSTALL%g +s%@build@%$build%g +s%@build_alias@%$build_alias%g +s%@build_cpu@%$build_cpu%g +s%@build_vendor@%$build_vendor%g +s%@build_os@%$build_os%g +s%@LN_S@%$LN_S%g +s%@OBJEXT@%$OBJEXT%g +s%@ECHO@%$ECHO%g +s%@STRIP@%$STRIP%g +s%@DLLTOOL@%$DLLTOOL%g +s%@AS@%$AS%g +s%@OBJDUMP@%$OBJDUMP%g +s%@TAGCC@%$TAGCC%g s%@MEMORYMGR@%$MEMORYMGR%g +s%@NAFLAGS@%$NAFLAGS%g +s%@NASM@%$NASM%g +s%@SIMDCHECKER@%$SIMDCHECKER%g s%@JPEG_LIB_VERSION@%$JPEG_LIB_VERSION%g s%@A2K_DEPS@%$A2K_DEPS%g s%@COM_A2K@%$COM_A2K%g s%@ANSI2KNRFLAGS@%$ANSI2KNRFLAGS%g s%@COM_LT@%$COM_LT%g s%@FORCE_INSTALL_LIB@%$FORCE_INSTALL_LIB%g +s%@UNINSTALL_LIB@%$UNINSTALL_LIB%g s%@INCLUDEFLAGS@%$INCLUDEFLAGS%g CEOF @@ -1952,6 +7052,7 @@ rm -f conftest.hdr # example, in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. cat >> conftest.vals <<\EOF +s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% EOF # Break up conftest.vals because some shells have a limit on diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..06171f0 --- /dev/null +++ b/configure.in @@ -0,0 +1,634 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT([jcmaster.c]) +AC_CONFIG_HEADER([jconfig.h:jconfig.cfg]) +dnl -------------------------------------------------------------------- +AC_PROG_CC +AC_PROG_CPP +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for function prototypes]) +AC_CACHE_VAL([ijg_cv_have_prototypes],[AC_TRY_COMPILE([ +int testfunction (int arg1, int * arg2); /* check prototypes */ +struct methods_struct { /* check method-pointer declarations */ + int (*error_exit) (char *msgtext); + int (*trace_message) (char *msgtext); + int (*another_method) (void); +}; +int testfunction (int arg1, int * arg2) /* check definitions */ +{ return arg2[arg1]; } +int test2function (void) /* check void arg list */ +{ return 0; } +],[ ],[ijg_cv_have_prototypes=yes],[ijg_cv_have_prototypes=no])]) +AC_MSG_RESULT([$ijg_cv_have_prototypes]) +if test $ijg_cv_have_prototypes = yes; then + AC_DEFINE([HAVE_PROTOTYPES],) +else + echo [Your compiler does not seem to know about function prototypes.] + echo [Perhaps it needs a special switch to enable ANSI C mode.] + echo [If so, we recommend running configure like this:] + echo [" ./configure CC='cc -switch'"] + echo [where -switch is the proper switch.] +fi +dnl -------------------------------------------------------------------- +AC_CHECK_HEADER([stddef.h],[AC_DEFINE([HAVE_STDDEF_H],)]) +AC_CHECK_HEADER([stdlib.h],[AC_DEFINE([HAVE_STDLIB_H],)]) +AC_CHECK_HEADER([string.h],[:],[AC_DEFINE([NEED_BSD_STRINGS],)]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for size_t]) +AC_TRY_COMPILE([ +#ifdef HAVE_STDDEF_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#include +#ifdef NEED_BSD_STRINGS +#include +#else +#include +#endif +typedef size_t my_size_t; +],[ my_size_t foovar; ], +[ijg_size_t_ok=yes], +[ijg_size_t_ok="not ANSI, perhaps it is in sys/types.h"]) +AC_MSG_RESULT([$ijg_size_t_ok]) +if test "$ijg_size_t_ok" != yes; then +AC_CHECK_HEADER([sys/types.h],[AC_DEFINE([NEED_SYS_TYPES_H],) +AC_EGREP_HEADER([size_t],[sys/types.h], +[ijg_size_t_ok="size_t is in sys/types.h"],[ijg_size_t_ok=no])], +[ijg_size_t_ok=no]) +AC_MSG_RESULT([$ijg_size_t_ok]) +if test "$ijg_size_t_ok" = no; then + echo [Type size_t is not defined in any of the usual places.] + echo [Try putting '"typedef unsigned int size_t;"' in jconfig.h.] +fi +fi +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for type unsigned char]) +AC_TRY_COMPILE(,[ unsigned char un_char; ],[AC_MSG_RESULT(yes) +AC_DEFINE([HAVE_UNSIGNED_CHAR],)],[AC_MSG_RESULT(no)]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for type unsigned short]) +AC_TRY_COMPILE(,[ unsigned short un_short; ],[AC_MSG_RESULT(yes) +AC_DEFINE([HAVE_UNSIGNED_SHORT],)],[AC_MSG_RESULT(no)]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for type void]) +AC_TRY_COMPILE([ +/* Caution: a C++ compiler will insist on valid prototypes */ +typedef void * void_ptr; /* check void * */ +#ifdef HAVE_PROTOTYPES /* check ptr to function returning void */ +typedef void (*void_func) (int a, int b); +#else +typedef void (*void_func) (); +#endif + +#ifdef HAVE_PROTOTYPES /* check void function result */ +void test3function (void_ptr arg1, void_func arg2) +#else +void test3function (arg1, arg2) + void_ptr arg1; + void_func arg2; +#endif +{ + char * locptr = (char *) arg1; /* check casting to and from void * */ + arg1 = (void *) locptr; + (*arg2) (1, 2); /* check call of fcn returning void */ +} +],[ ],[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no) +AC_DEFINE([void],[char])]) + +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for working const]) +AC_CACHE_VAL([ac_cv_c_const],[AC_TRY_COMPILE(,[ +/* Ultrix mips cc rejects this. */ +typedef int charset[2]; const charset x; +/* SunOS 4.1.1 cc rejects this. */ +char const *const *ccp; +char **p; +/* NEC SVR4.0.2 mips cc rejects this. */ +struct point {int x, y;}; +static struct point const zero = {0,0}; +/* AIX XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in an arm + of an if-expression whose if-part is not a constant expression */ +const char *g = "string"; +ccp = &g + (g ? g-g : 0); +/* HPUX 7.0 cc rejects these. */ +++ccp; +p = (char**) ccp; +ccp = (char const *const *) p; +{ /* SCO 3.2v4 cc rejects this. */ + char *t; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; +} +{ /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; +} +{ /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; +} +{ /* AIX XL C 1.02.0.0 rejects this saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; }; + struct s *b; b->j = 5; +} +{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; +} +],[ac_cv_c_const=yes],[ac_cv_c_const=no])]) +AC_MSG_RESULT([$ac_cv_c_const]) +if test $ac_cv_c_const = no; then + AC_DEFINE([const],) +fi + +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for inline]) +ijg_cv_inline="" +AC_TRY_COMPILE(,[} __inline__ int foo() { return 0; } +int bar() { return foo();],[ijg_cv_inline="__inline__"], +[AC_TRY_COMPILE(,[} __inline int foo() { return 0; } +int bar() { return foo();],[ijg_cv_inline="__inline"], +[AC_TRY_COMPILE(,[} inline int foo() { return 0; } +int bar() { return foo();],[ijg_cv_inline="inline"],)])]) +AC_MSG_RESULT([$ijg_cv_inline]) +AC_DEFINE_UNQUOTED([INLINE],[$ijg_cv_inline]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for broken incomplete types]) +AC_TRY_COMPILE([ typedef struct undefined_structure * undef_struct_ptr; ], +,[AC_MSG_RESULT(ok)],[AC_MSG_RESULT(broken) +AC_DEFINE([INCOMPLETE_TYPES_BROKEN],)]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([for short external names]) +AC_TRY_LINK([ +int possibly_duplicate_function () { return 0; } +int possibly_dupli_function () { return 1; } +],[ ],[AC_MSG_RESULT(ok)],[AC_MSG_RESULT(short) +AC_DEFINE([NEED_SHORT_EXTERNAL_NAMES],)]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([to see if char is signed]) +AC_TRY_RUN([ +#ifdef HAVE_PROTOTYPES +int is_char_signed (int arg) +#else +int is_char_signed (arg) + int arg; +#endif +{ + if (arg == 189) { /* expected result for unsigned char */ + return 0; /* type char is unsigned */ + } + else if (arg != -67) { /* expected result for signed char */ + printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n"); + printf("I fear the JPEG software will not work at all.\n\n"); + } + return 1; /* assume char is signed otherwise */ +} +char signed_char_check = (char) (-67); +main() { + exit(is_char_signed((int) signed_char_check)); +}],[AC_MSG_RESULT(no) +AC_DEFINE([CHAR_IS_UNSIGNED],)],[AC_MSG_RESULT(yes)], +[echo Assuming that char is signed on target machine. +echo If it is unsigned, this will be a little bit inefficient. +]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([to see if right shift is signed]) +AC_TRY_RUN([ +#ifdef HAVE_PROTOTYPES +int is_shifting_signed (long arg) +#else +int is_shifting_signed (arg) + long arg; +#endif +/* See whether right-shift on a long is signed or not. */ +{ + long res = arg >> 4; + + if (res == -0x7F7E80CL) { /* expected result for signed shift */ + return 1; /* right shift is signed */ + } + /* see if unsigned-shift hack will fix it. */ + /* we can't just test exact value since it depends on width of long... */ + res |= (~0L) << (32-4); + if (res == -0x7F7E80CL) { /* expected result now? */ + return 0; /* right shift is unsigned */ + } + printf("Right shift isn't acting as I expect it to.\n"); + printf("I fear the JPEG software will not work at all.\n\n"); + return 0; /* try it with unsigned anyway */ +} +main() { + exit(is_shifting_signed(-0x7F7E80B1L)); +}],[AC_MSG_RESULT(no) +AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED],)],[AC_MSG_RESULT(yes)], +[AC_MSG_RESULT([Assuming that right shift is signed on target machine.])]) +dnl -------------------------------------------------------------------- +AC_MSG_CHECKING([to see if fopen accepts b spec]) +AC_TRY_RUN([ +#include +main() { + if (fopen("conftestdata", "wb") != NULL) + exit(0); + exit(1); +}],[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no) +AC_DEFINE([DONT_USE_B_MODE],)],[AC_MSG_RESULT([Assuming that it does.])]) +dnl -------------------------------------------------------------------- +AC_PROG_INSTALL +AC_PROG_RANLIB +dnl -------------------------------------------------------------------- + +AC_CANONICAL_HOST +AC_EXEEXT + +# Decide whether to use libtool, +# and if so whether to build shared, static, or both flavors of library. +AC_DISABLE_SHARED +AC_DISABLE_STATIC +if test "x$enable_shared" != xno -o "x$enable_static" != xno; then + USELIBTOOL="yes" +# LIBTOOL="./libtool" + O="lo" + A="la" + LN='$(LIBTOOL) --mode=link $(CC)' + INSTALL_LIB='$(LIBTOOL) --mode=install ${INSTALL}' + INSTALL_PROGRAM="\$(LIBTOOL) --mode=install $INSTALL_PROGRAM" + UNINSTALL='$(LIBTOOL) --mode=uninstall $(RM)' +else + USELIBTOOL="no" + LIBTOOL="" + O="o" + A="a" + LN='$(CC)' + INSTALL_LIB="$INSTALL_DATA" + UNINSTALL='$(RM)' +fi +AC_SUBST([LIBTOOL]) +AC_SUBST([O]) +AC_SUBST([A]) +AC_SUBST([LN]) +AC_SUBST([INSTALL_LIB]) +AC_SUBST([UNINSTALL]) + +# Configure libtool if needed. +if test $USELIBTOOL = yes; then + AC_LIBTOOL_DLOPEN + AC_LIBTOOL_WIN32_DLL + AC_PROG_LIBTOOL +fi +# if libtool >= 1.5 +TAGCC=ifdef([AC_LIBTOOL_GCJ],[--tag=CC]) +AC_SUBST([TAGCC]) + +dnl -------------------------------------------------------------------- +# Select memory manager depending on user input. +# If no "-enable-maxmem", use jmemnobs +MEMORYMGR='jmemnobs.$(O)' +MAXMEM="no" +AC_ARG_ENABLE([maxmem], +[ --enable-maxmem[=N] enable use of temp files, set max mem usage to N MB], +[MAXMEM="$enableval"]) +# support --with-maxmem for backwards compatibility with IJG V5. +AC_ARG_WITH([maxmem],,[MAXMEM="$withval"]) +if test "x$MAXMEM" = xyes; then + MAXMEM=1 +fi +if test "x$MAXMEM" != xno; then + if test -n "`echo $MAXMEM | sed 's/[[0-9]]//g'`"; then + AC_MSG_ERROR([non-numeric argument to --enable-maxmem]) + fi + DEFAULTMAXMEM=`expr $MAXMEM \* 1048576` +AC_DEFINE_UNQUOTED([DEFAULT_MAX_MEM],[${DEFAULTMAXMEM}]) +AC_MSG_CHECKING([for 'tmpfile()']) +AC_TRY_LINK([#include ],[ FILE * tfile = tmpfile(); ], +[AC_MSG_RESULT(yes) +MEMORYMGR='jmemansi.$(O)'], +[AC_MSG_RESULT(no) +MEMORYMGR='jmemname.$(O)' +AC_DEFINE([NEED_SIGNAL_CATCHER],) +AC_MSG_CHECKING([for 'mktemp()']) +AC_TRY_LINK(,[ char fname[80]; mktemp(fname); ], +[AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no) +AC_DEFINE([NO_MKTEMP],)])]) +fi +AC_SUBST([MEMORYMGR]) + +dnl ==================================================================== + +AC_MSG_CHECKING([to see if the host cpu type is i386 or compatible]) +case "$host_cpu" in + i*86 | x86 | ia32) + AC_MSG_RESULT(yes) + ;; + x86_64 | amd64 | aa64) + AC_MSG_RESULT([no (x86_64)]) + AC_MSG_ERROR([Currently, this version of JPEG library cannot be compiled as 64-bit code. sorry.]) + ;; + *) + AC_MSG_RESULT([no ("$host_cpu")]) + AC_MSG_ERROR([This version of JPEG library is for i386 or compatible processors only.]) + ;; +esac + +if test -z "$NAFLAGS" ; then + AC_MSG_CHECKING([for object file format of host system]) + case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + objfmt='Win32-COFF' + ;; + msdosdjgpp* | go32*) + objfmt='COFF' + ;; + os2-emx*) # not tested + objfmt='MSOMF' # obj + ;; + linux*coff* | linux*oldld*) + objfmt='COFF' # ??? + ;; + linux*aout*) + objfmt='a.out' + ;; + linux*) + objfmt='ELF' + ;; + freebsd* | netbsd* | openbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then + objfmt='BSD-a.out' + else + objfmt='ELF' + fi + ;; + solaris* | sunos* | sysv* | sco*) + objfmt='ELF' + ;; + darwin* | rhapsody* | nextstep* | openstep* | macos*) + objfmt='Mach-O' + ;; + *) + objfmt='ELF ?' + ;; + esac + AC_MSG_RESULT([$objfmt]) + if test "$objfmt" = 'ELF ?'; then + objfmt='ELF' + AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.]) + fi +else + objfmt='' +fi +AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ]) +case "$objfmt" in + MSOMF) NAFLAGS='-fobj -DOBJ32';; + Win32-COFF) NAFLAGS='-fwin32 -DWIN32';; + COFF) NAFLAGS='-fcoff -DCOFF';; + a.out) NAFLAGS='-faout -DAOUT';; + BSD-a.out) NAFLAGS='-faoutb -DAOUT';; + ELF) NAFLAGS='-felf -DELF';; + RDF) NAFLAGS='-frdf -DRDF';; + Mach-O) NAFLAGS='-fmacho -DMACHO';; +esac +AC_MSG_RESULT([$NAFLAGS]) +AC_SUBST([NAFLAGS]) + +dnl -------------------------------------------------------------------- + +AC_CHECK_PROGS(NASM, [nasm nasmw]) +test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found in \$PATH]) +if echo "$NASM" | grep yasm > /dev/null; then + AC_MSG_WARN([DON'T USE YASM! CURRENT VERSION (R0.4.0) IS BUGGY!]) +fi + +AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works]) +cat > conftest.asm <&AC_FD_CC + cat conftest.asm >&AC_FD_CC + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.]) +fi +AC_MSG_CHECKING([whether the linker accepts assembler output]) +try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC' +if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then + rm -rf conftest* + AC_MSG_RESULT(yes) +else + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([configuration problem: maybe object file format mismatch.]) +fi + +AC_MSG_CHECKING([whether the assembler supports line continuation character]) +cat > conftest.asm <<\EOF +[%line __oline__ "configure" +; The line continuation character '\' +; was introduced in nasm 0.98.25. + section .text + bits 32 + global _zero +_zero: xor \ + eax,eax + ret +]EOF +try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm' +if AC_TRY_EVAL(try_nasm) && test -s conftest.o; then + rm -rf conftest* + AC_MSG_RESULT(yes) +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.asm >&AC_FD_CC + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([you have to use a more recent version of the assembler.]) +fi + +dnl -------------------------------------------------------------------- + +AC_MSG_CHECKING([SIMD instruction sets requested to use]) +simd_to_use="" + +AC_ARG_ENABLE(mmx, +[ --disable-mmx do not use MMX instruction set], +[if test "x$enableval" = xno; then + AC_DEFINE([JSIMD_MMX_NOT_SUPPORTED],) +else + simd_to_use="$simd_to_use MMX" +fi], [simd_to_use="$simd_to_use MMX"]) + +AC_ARG_ENABLE(3dnow, +[ --disable-3dnow do not use 3DNow! instruction set], +[if test "x$enableval" = xno; then + AC_DEFINE([JSIMD_3DNOW_NOT_SUPPORTED],) +else + simd_to_use="$simd_to_use 3DNow!" +fi], [simd_to_use="$simd_to_use 3DNow!"]) + +AC_ARG_ENABLE(sse, +[ --disable-sse do not use SSE instruction set], +[if test "x$enableval" = xno; then + AC_DEFINE([JSIMD_SSE_NOT_SUPPORTED],) +else + simd_to_use="$simd_to_use SSE" +fi], [simd_to_use="$simd_to_use SSE"]) + +AC_ARG_ENABLE(sse2, +[ --disable-sse2 do not use SSE2 instruction set], +[if test "x$enableval" = xno; then + AC_DEFINE([JSIMD_SSE2_NOT_SUPPORTED],) +else + simd_to_use="$simd_to_use SSE2" +fi], [simd_to_use="$simd_to_use SSE2"]) + +test -z "$simd_to_use" && simd_to_use="NONE" +AC_MSG_RESULT([$simd_to_use]) + +for simd_name in $simd_to_use; do +case "$simd_name" in + MMX) simd_instruction='psubw mm0,mm0';; + 3DNow!) simd_instruction='pfsub mm0,mm0';; + SSE) simd_instruction='subps xmm0,xmm0';; + SSE2) simd_instruction='subpd xmm0,xmm0';; + *) continue;; +esac +AC_MSG_CHECKING([whether the assembler supports $simd_name instructions]) +cat > conftest.asm <&AC_FD_CC + cat conftest.asm >&AC_FD_CC + rm -rf conftest* + AC_MSG_RESULT(no) + AC_MSG_ERROR([you have to use a more recent version of the assembler.]) +fi +done + +dnl -------------------------------------------------------------------- +# Select OS-dependent SIMD instruction support checker. +# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc) +if test "x$SIMDCHECKER" = x ; then + case "$host_os" in + cygwin* | mingw* | pw32* | interix*) + SIMDCHECKER='jsimdw32.$(O)' + ;; + msdosdjgpp* | go32*) + SIMDCHECKER='jsimddjg.$(O)' + ;; + os2-emx*) # not tested + SIMDCHECKER='jsimdgcc.$(O)' + ;; + *) + SIMDCHECKER='jsimdgcc.$(O)' + ;; + esac +fi +AC_SUBST([SIMDCHECKER]) + +case "$host_os" in + cygwin* | mingw* | pw32* | os2-emx* | msdosdjgpp* | go32*) + AC_DEFINE([USE_SETMODE],) + ;; +# _host_name_*) +# AC_DEFINE([USE_FDOPEN],) +# ;; +esac + +# This is for UNIX-like environments on Windows platform. +AC_ARG_ENABLE(uchar-boolean, +[ --enable-uchar-boolean define type \"boolean\" as unsigned char (for Windows)], +[if test "x$enableval" != xno; then + AC_DEFINE([TYPEDEF_UCHAR_BOOLEAN],) +fi]) + +dnl -------------------------------------------------------------------- + +JPEG_LIB_VERSION="63:0:1" +confv_dirs="$srcdir $srcdir/.. $srcdir/../.." +config_ver= +for ac_dir in $confv_dirs; do + if test -r $ac_dir/config.ver; then + config_ver=$ac_dir/config.ver + break + fi +done +if test -z "$config_ver"; then + AC_MSG_WARN([cannot find config.ver in $confv_dirs]) + AC_MSG_WARN([default version number $JPEG_LIB_VERSION is used]) + AC_MSG_CHECKING([libjpeg version number for libtool]) + AC_MSG_RESULT([$JPEG_LIB_VERSION]) +else + AC_MSG_CHECKING([libjpeg version number for libtool]) + . $config_ver + AC_MSG_RESULT([$JPEG_LIB_VERSION]) + echo "configure: if you want to change the version number, modify $config_ver" 1>&2 +fi +AC_SUBST([JPEG_LIB_VERSION]) + +dnl -------------------------------------------------------------------- +# Prepare to massage makefile.cfg correctly. +if test $ijg_cv_have_prototypes = yes; then + A2K_DEPS="" + COM_A2K="# " +else + A2K_DEPS="ansi2knr" + COM_A2K="" +fi +AC_SUBST([A2K_DEPS]) +AC_SUBST([COM_A2K]) +# ansi2knr needs -DBSD if string.h is missing +if test $ac_cv_header_string_h = no; then + ANSI2KNRFLAGS="-DBSD" +else + ANSI2KNRFLAGS="" +fi +AC_SUBST([ANSI2KNRFLAGS]) +# Substitutions to enable or disable libtool-related stuff +if test $USELIBTOOL = yes -a $ijg_cv_have_prototypes = yes; then + COM_LT="" +else + COM_LT="# " +fi +AC_SUBST([COM_LT]) +if test "x$enable_shared" != xno; then + FORCE_INSTALL_LIB="install-lib" + UNINSTALL_LIB="uninstall-lib" +else + FORCE_INSTALL_LIB="" + UNINSTALL_LIB="" +fi +AC_SUBST([FORCE_INSTALL_LIB]) +AC_SUBST([UNINSTALL_LIB]) +# Set up -I directives +if test "x$srcdir" = x.; then + INCLUDEFLAGS='-I$(srcdir)' +else + INCLUDEFLAGS='-I. -I$(srcdir)' +fi +AC_SUBST([INCLUDEFLAGS]) +dnl -------------------------------------------------------------------- +AC_OUTPUT([Makefile:makefile.cfg]) diff --git a/djpeg.c b/djpeg.c index e099e90..a1ec059 100644 --- a/djpeg.c +++ b/djpeg.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : August 23, 2005 + * --------------------------------------------------------------------- + * * This file contains a command-line user interface for the JPEG decompressor. * It should work on any system with Unix- or MS-DOS-style command lines. * @@ -158,6 +165,22 @@ usage (void) } +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +LOCAL(void) +print_simd_info (FILE * file, char * labelstr, unsigned int simd) +{ + fprintf(file, "%s%s%s%s%s%s\n", labelstr, + simd & JSIMD_MMX ? " MMX" : "", + simd & JSIMD_3DNOW ? " 3DNow!" : "", + simd & JSIMD_SSE ? " SSE" : "", + simd & JSIMD_SSE2 ? " SSE2" : "", + simd == JSIMD_NONE ? " NONE" : ""); +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + + LOCAL(int) parse_switches (j_decompress_ptr cinfo, int argc, char **argv, int last_file_arg_seen, boolean for_real) @@ -208,6 +231,19 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, cinfo->desired_number_of_colors = val; cinfo->quantize_colors = TRUE; +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + } else if (keymatch(arg, "nosimd" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_ALL); + } else if (keymatch(arg, "nommx" , 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_MMX); + } else if (keymatch(arg, "no3dnow", 3)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_3DNOW); + } else if (keymatch(arg, "nosse" , 4)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE); + } else if (keymatch(arg, "nosse2" , 6)) { + jpeg_simd_mask((j_common_ptr) cinfo, JSIMD_NONE, JSIMD_SSE2); +#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */ + } else if (keymatch(arg, "dct", 2)) { /* Select IDCT algorithm. */ if (++argn >= argc) /* advance to next argument */ @@ -242,6 +278,38 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, if (! printed_version) { fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n", JVERSION, JCOPYRIGHT); + fprintf(stderr, + "\nx86 SIMD extension for IJG JPEG library, version %s\n\n", + JPEG_SIMDEXT_VER_STR); +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + print_simd_info(stderr, "SIMD instructions supported by the system :", + jpeg_simd_support(NULL)); + + fprintf(stderr, "\n === SIMD Operation Modes ===\n"); +#ifdef DCT_ISLOW_SUPPORTED + print_simd_info(stderr, "Accurate integer DCT (-dct int) :", + jpeg_simd_inverse_dct(cinfo, JDCT_ISLOW)); +#endif +#ifdef DCT_IFAST_SUPPORTED + print_simd_info(stderr, "Fast integer DCT (-dct fast) :", + jpeg_simd_inverse_dct(cinfo, JDCT_IFAST)); +#endif +#ifdef DCT_FLOAT_SUPPORTED + print_simd_info(stderr, "Floating-point DCT (-dct float) :", + jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT)); +#endif +#ifdef IDCT_SCALING_SUPPORTED + print_simd_info(stderr, "Reduced-size DCT (-scale M/N) :", + jpeg_simd_inverse_dct(cinfo, JDCT_FLOAT+1)); +#endif + print_simd_info(stderr, "High-quality upsampling (default) :", + jpeg_simd_upsampler(cinfo, TRUE)); + print_simd_info(stderr, "Low-quality upsampling (-nosmooth) :", + jpeg_simd_upsampler(cinfo, FALSE)); + print_simd_info(stderr, "Colorspace conversion (YCbCr->RGB) :", + jpeg_simd_color_deconverter(cinfo)); + fprintf(stderr, "\n"); +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ printed_version = TRUE; } cinfo->err->trace_level++; diff --git a/install-sh b/install-sh index e843669..4d4a951 100755 --- a/install-sh +++ b/install-sh @@ -1,19 +1,38 @@ #!/bin/sh -# # install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). + +scriptversion=2005-05-14.22 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # -# Copyright 1991 by the Massachusetts Institute of Technology +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # `make' implicit rules from creating a file called install from it @@ -23,13 +42,11 @@ # from scratch. It can only install one file at a time, a restriction # shared with many OS's install programs. - # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit="${DOITPROG-}" - # put in absolute paths if you don't have them in your path; or use env. vars. mvprog="${MVPROG-mv}" @@ -41,210 +58,266 @@ stripprog="${STRIPPROG-strip}" rmprog="${RMPROG-rm}" mkdirprog="${MKDIRPROG-mkdir}" -transformbasename="" -transform_arg="" -instcmd="$mvprog" chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" +chowncmd= +chgrpcmd= +stripcmd= rmcmd="$rmprog -f" mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" +src= +dst= +dir_arg= +dstarg= +no_target_directory= + +usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: +-c (ignored) +-d create directories instead of installing files. +-g GROUP $chgrpprog installed files to GROUP. +-m MODE $chmodprog installed files to MODE. +-o USER $chownprog installed files to USER. +-s $stripprog installed files. +-t DIRECTORY install into DIRECTORY. +-T report an error if DSTFILE is a directory. +--help display this help and exit. +--version display version info and exit. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG +" + +while test -n "$1"; do + case $1 in + -c) shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + --help) echo "$usage"; exit $?;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t) dstarg=$2 shift + shift + continue;; - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" + -T) no_target_directory=true + shift + continue;; + + --version) echo "$0 $scriptversion"; exit $?;; + + *) # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + test -n "$dir_arg$dstarg" && break + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dstarg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dstarg" + shift # fnord + fi + shift # arg + dstarg=$arg + done + break;; + esac done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile +if test -z "$1"; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi -fi && +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src ;; + esac + + if test -n "$dir_arg"; then + dst=$src + src= + + if test -d "$dst"; then + mkdircmd=: + chmodcmd= + else + mkdircmd=$mkdirprog + fi + else + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dstarg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dstarg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst ;; + esac + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dstarg: Is a directory" >&2 + exit 1 + fi + dst=$dst/`basename "$src"` + fi + fi + + # This sed command emulates the dirname command. + dstdir=`echo "$dst" | sed -e 's,/*$,,;s,[^/]*$,,;s,/*$,,;s,^$,.,'` + + # Make sure that the destination directory exists. + + # Skip lots of stat calls in the usual case. + if test ! -d "$dstdir"; then + defaultIFS=' + ' + IFS="${IFS-$defaultIFS}" + + oIFS=$IFS + # Some sh's can't handle IFS=/ for some reason. + IFS='%' + set x `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` + shift + IFS=$oIFS + + pathcomp= + + while test $# -ne 0 ; do + pathcomp=$pathcomp$1 + shift + if test ! -d "$pathcomp"; then + $mkdirprog "$pathcomp" + # mkdir can fail with a `File exist' error in case several + # install-sh are creating the directory concurrently. This + # is OK. + test -d "$pathcomp" || exit + fi + pathcomp=$pathcomp/ + done + fi + + if test -n "$dir_arg"; then + $doit $mkdircmd "$dst" \ + && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } + + else + dstfile=`basename "$dst"` + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + trap '(exit $?); exit' 1 2 13 15 + + # Copy the file name to the temp name. + $doit $cpprog "$src" "$dsttmp" && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && + + # Now rename the file to the real destination. + { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ + || { + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + if test -f "$dstdir/$dstfile"; then + $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ + || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ + || { + echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 + (exit 1); exit 1 + } + else + : + fi + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" + } + } + fi || { (exit 1); exit 1; } +done -exit 0 +# The final little trick to "correctly" pass the exit status to the exit trap. +{ + (exit 0); exit 0 +} + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/jccolmmx.asm b/jccolmmx.asm new file mode 100644 index 0000000..2e2fca6 --- /dev/null +++ b/jccolmmx.asm @@ -0,0 +1,513 @@ +; +; jccolmmx.asm - colorspace conversion (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_mmx) + +EXTN(jconst_rgb_ycc_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jpeg_rgb_ycc_convert_mmx (j_compress_ptr cinfo, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_rgb_ycc_convert_mmx) + +EXTN(jpeg_rgb_ycc_convert_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jcstruct_image_width(ecx)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax,eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx,edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd mmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA,mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_ycc_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmF,mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE,mmA + psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH,mmH + + movq mmC,mmA + punpcklbw mmA,mmH ; mmA=(00 02 04 06) + punpckhbw mmC,mmH ; mmC=(10 12 14 16) + + movq mmB,mmE + punpcklbw mmE,mmH ; mmE=(20 22 24 26) + punpckhbw mmB,mmH ; mmB=(01 03 05 07) + + movq mmF,mmD + punpcklbw mmD,mmH ; mmD=(11 13 15 17) + punpckhbw mmF,mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF,mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmD,mmA + movq mmC,mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB,mmA + punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG,mmD + punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE,mmA + punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH,mmB + punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF,mmF + + movq mmC,mmA + punpcklbw mmA,mmF ; mmA=(00 02 04 06) + punpckhbw mmC,mmF ; mmC=(10 12 14 16) + + movq mmD,mmB + punpcklbw mmB,mmF ; mmB=(01 03 05 07) + punpckhbw mmD,mmF ; mmD=(11 13 15 17) + + movq mmG,mmE + punpcklbw mmE,mmF ; mmE=(20 22 24 26) + punpckhbw mmG,mmF ; mmG=(30 32 34 36) + + punpcklbw mmF,mmH + punpckhbw mmH,mmH + psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movq MMWORD [wk(0)], mm0 ; wk(0)=RE + movq MMWORD [wk(1)], mm1 ; wk(1)=RO + movq MMWORD [wk(2)], mm4 ; wk(2)=BE + movq MMWORD [wk(3)], mm5 ; wk(3)=BO + + movq mm6,mm1 + punpcklwd mm1,mm3 + punpckhwd mm6,mm3 + movq mm7,mm1 + movq mm4,mm6 + pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor mm1,mm1 + pxor mm6,mm6 + punpcklwd mm1,mm5 ; mm1=BOL + punpckhwd mm6,mm5 ; mm6=BOH + psrld mm1,1 ; mm1=BOL*FIX(0.500) + psrld mm6,1 ; mm6=BOH*FIX(0.500) + + movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ] + + paddd mm7,mm1 + paddd mm4,mm6 + paddd mm7,mm5 + paddd mm4,mm5 + psrld mm7,SCALEBITS ; mm7=CbOL + psrld mm4,SCALEBITS ; mm4=CbOH + packssdw mm7,mm4 ; mm7=CbO + + movq mm1, MMWORD [wk(2)] ; mm1=BE + + movq mm6,mm0 + punpcklwd mm0,mm2 + punpckhwd mm6,mm2 + movq mm5,mm0 + movq mm4,mm6 + pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor mm0,mm0 + pxor mm6,mm6 + punpcklwd mm0,mm1 ; mm0=BEL + punpckhwd mm6,mm1 ; mm6=BEH + psrld mm0,1 ; mm0=BEL*FIX(0.500) + psrld mm6,1 ; mm6=BEH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm5,mm0 + paddd mm4,mm6 + paddd mm5,mm1 + paddd mm4,mm1 + psrld mm5,SCALEBITS ; mm5=CbEL + psrld mm4,SCALEBITS ; mm4=CbEH + packssdw mm5,mm4 ; mm5=CbE + + psllw mm7,BYTE_BIT + por mm5,mm7 ; mm5=Cb + movq MMWORD [ebx], mm5 ; Save Cb + + movq mm0, MMWORD [wk(3)] ; mm0=BO + movq mm6, MMWORD [wk(2)] ; mm6=BE + movq mm1, MMWORD [wk(1)] ; mm1=RO + + movq mm4,mm0 + punpcklwd mm0,mm3 + punpckhwd mm4,mm3 + movq mm7,mm0 + movq mm5,mm4 + pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, MMWORD [wk(4)] + paddd mm4, MMWORD [wk(5)] + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL + psrld mm4,SCALEBITS ; mm4=YOH + packssdw mm0,mm4 ; mm0=YO + + pxor mm3,mm3 + pxor mm4,mm4 + punpcklwd mm3,mm1 ; mm3=ROL + punpckhwd mm4,mm1 ; mm4=ROH + psrld mm3,1 ; mm3=ROL*FIX(0.500) + psrld mm4,1 ; mm4=ROH*FIX(0.500) + + movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm7,mm3 + paddd mm5,mm4 + paddd mm7,mm1 + paddd mm5,mm1 + psrld mm7,SCALEBITS ; mm7=CrOL + psrld mm5,SCALEBITS ; mm5=CrOH + packssdw mm7,mm5 ; mm7=CrO + + movq mm3, MMWORD [wk(0)] ; mm3=RE + + movq mm4,mm6 + punpcklwd mm6,mm2 + punpckhwd mm4,mm2 + movq mm1,mm6 + movq mm5,mm4 + pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(6)] + paddd mm4, MMWORD [wk(7)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL + psrld mm4,SCALEBITS ; mm4=YEH + packssdw mm6,mm4 ; mm6=YE + + psllw mm0,BYTE_BIT + por mm6,mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + pxor mm2,mm2 + pxor mm4,mm4 + punpcklwd mm2,mm3 ; mm2=REL + punpckhwd mm4,mm3 ; mm4=REH + psrld mm2,1 ; mm2=REL*FIX(0.500) + psrld mm4,1 ; mm4=REH*FIX(0.500) + + movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ] + + paddd mm1,mm2 + paddd mm5,mm4 + paddd mm1,mm0 + paddd mm5,mm0 + psrld mm1,SCALEBITS ; mm1=CrEL + psrld mm5,SCALEBITS ; mm5=CrEH + packssdw mm1,mm5 ; mm1=CrE + + psllw mm7,BYTE_BIT + por mm1,mm7 ; mm1=Cr + movq MMWORD [edx], mm1 ; Save Cr + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + add ebx, byte SIZEOF_MMWORD ; outptr1 + add edx, byte SIZEOF_MMWORD ; outptr2 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JCCOLOR_RGBYCC_MMX_SUPPORTED +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 diff --git a/jccolor.c b/jccolor.c index 0a8a4b5..85f3083 100644 --- a/jccolor.c +++ b/jccolor.c @@ -5,12 +5,20 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 5, 2006 + * --------------------------------------------------------------------- + * * This file contains input colorspace conversion routines. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jcolsamp.h" /* Private declarations */ /* Private subobject */ @@ -352,6 +360,7 @@ GLOBAL(void) jinit_color_converter (j_compress_ptr cinfo) { my_cconvert_ptr cconvert; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -420,8 +429,23 @@ jinit_color_converter (j_compress_ptr cinfo) if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_RGB) { - cconvert->pub.start_pass = rgb_ycc_start; - cconvert->pub.color_convert = rgb_ycc_convert; +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_rgb_ycc_convert_sse2)) { + cconvert->pub.color_convert = jpeg_rgb_ycc_convert_sse2; + } else +#endif +#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + cconvert->pub.color_convert = jpeg_rgb_ycc_convert_mmx; + } else +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + { + cconvert->pub.start_pass = rgb_ycc_start; + cconvert->pub.color_convert = rgb_ycc_convert; + } } else if (cinfo->in_color_space == JCS_YCbCr) cconvert->pub.color_convert = null_convert; else @@ -457,3 +481,28 @@ jinit_color_converter (j_compress_ptr cinfo) break; } } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_color_converter (j_compress_ptr cinfo) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_rgb_ycc_convert_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + + return JSIMD_NONE; +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jccolss2.asm b/jccolss2.asm new file mode 100644 index 0000000..1aabd89 --- /dev/null +++ b/jccolss2.asm @@ -0,0 +1,541 @@ +; +; jccolss2.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_rgb_ycc_convert_sse2) + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jpeg_rgb_ycc_convert_sse2 (j_compress_ptr cinfo, +; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +; JDIMENSION output_row, int num_rows); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf +%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf +%define output_row(b) (b)+20 ; JDIMENSION output_row +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_rgb_ycc_convert_sse2) + +EXTN(jpeg_rgb_ycc_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jcstruct_image_width(ecx)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16,7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax,edx +.column_ld4: + movd xmmA,eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, _DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA,xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, _MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB,xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE,xmmA + pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH,xmmH + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB,xmmE + punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF,xmmD + punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, _DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, _MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA,xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE,xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF,xmmA + movdqa xmmH,xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16,7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC,xmmF + punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB,xmmA + punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG,xmmD + punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE,xmmA + punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH,xmmB + punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF,xmmF + + movdqa xmmC,xmmA + punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD,xmmB + punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG,xmmE + punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF,xmmH + punpckhbw xmmH,xmmH + psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6,xmm1 + punpcklwd xmm1,xmm3 + punpckhwd xmm6,xmm3 + movdqa xmm7,xmm1 + movdqa xmm4,xmm6 + pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1,xmm1 + pxor xmm6,xmm6 + punpcklwd xmm1,xmm5 ; xmm1=BOL + punpckhwd xmm6,xmm5 ; xmm6=BOH + psrld xmm1,1 ; xmm1=BOL*FIX(0.500) + psrld xmm6,1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm1 + paddd xmm4,xmm6 + paddd xmm7,xmm5 + paddd xmm4,xmm5 + psrld xmm7,SCALEBITS ; xmm7=CbOL + psrld xmm4,SCALEBITS ; xmm4=CbOH + packssdw xmm7,xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6,xmm0 + punpcklwd xmm0,xmm2 + punpckhwd xmm6,xmm2 + movdqa xmm5,xmm0 + movdqa xmm4,xmm6 + pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0,xmm0 + pxor xmm6,xmm6 + punpcklwd xmm0,xmm1 ; xmm0=BEL + punpckhwd xmm6,xmm1 ; xmm6=BEH + psrld xmm0,1 ; xmm0=BEL*FIX(0.500) + psrld xmm6,1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5,xmm0 + paddd xmm4,xmm6 + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrld xmm5,SCALEBITS ; xmm5=CbEL + psrld xmm4,SCALEBITS ; xmm4=CbEH + packssdw xmm5,xmm4 ; xmm5=CbE + + psllw xmm7,BYTE_BIT + por xmm5,xmm7 ; xmm5=Cb + movdqa XMMWORD [ebx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4,xmm0 + punpcklwd xmm0,xmm3 + punpckhwd xmm4,xmm3 + movdqa xmm7,xmm0 + movdqa xmm5,xmm4 + pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0,xmm3 + paddd xmm4,xmm3 + psrld xmm0,SCALEBITS ; xmm0=YOL + psrld xmm4,SCALEBITS ; xmm4=YOH + packssdw xmm0,xmm4 ; xmm0=YO + + pxor xmm3,xmm3 + pxor xmm4,xmm4 + punpcklwd xmm3,xmm1 ; xmm3=ROL + punpckhwd xmm4,xmm1 ; xmm4=ROH + psrld xmm3,1 ; xmm3=ROL*FIX(0.500) + psrld xmm4,1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7,xmm3 + paddd xmm5,xmm4 + paddd xmm7,xmm1 + paddd xmm5,xmm1 + psrld xmm7,SCALEBITS ; xmm7=CrOL + psrld xmm5,SCALEBITS ; xmm5=CrOH + packssdw xmm7,xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4,xmm6 + punpcklwd xmm6,xmm2 + punpckhwd xmm4,xmm2 + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6,xmm2 + paddd xmm4,xmm2 + psrld xmm6,SCALEBITS ; xmm6=YEL + psrld xmm4,SCALEBITS ; xmm4=YEH + packssdw xmm6,xmm4 ; xmm6=YE + + psllw xmm0,BYTE_BIT + por xmm6,xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + pxor xmm2,xmm2 + pxor xmm4,xmm4 + punpcklwd xmm2,xmm3 ; xmm2=REL + punpckhwd xmm4,xmm3 ; xmm4=REH + psrld xmm2,1 ; xmm2=REL*FIX(0.500) + psrld xmm4,1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1,xmm2 + paddd xmm5,xmm4 + paddd xmm1,xmm0 + paddd xmm5,xmm0 + psrld xmm1,SCALEBITS ; xmm1=CrEL + psrld xmm5,SCALEBITS ; xmm5=CrEH + packssdw xmm1,xmm5 ; xmm1=CrE + + psllw xmm7,BYTE_BIT + por xmm1,xmm7 ; xmm1=Cr + movdqa XMMWORD [edx], xmm1 ; Save Cr + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + add ebx, byte SIZEOF_XMMWORD ; outptr1 + add edx, byte SIZEOF_XMMWORD ; outptr2 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JCCOLOR_RGBYCC_SSE2_SUPPORTED +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 diff --git a/jcdctmgr.c b/jcdctmgr.c index 61fa79b..3a89eb4 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : December 24, 2005 + * --------------------------------------------------------------------- + * * This file contains the forward-DCT management logic. * This code selects a particular DCT implementation to be used, * and it performs related housekeeping chores including coefficient @@ -24,6 +31,8 @@ typedef struct { /* Pointer to the DCT routine actually in use */ forward_DCT_method_ptr do_dct; + convsamp_int_method_ptr convsamp; + quantize_int_method_ptr quantize; /* The actual post-DCT divisors --- not identical to the quant table * entries, because of scaling (especially for an unnormalized DCT). @@ -34,12 +43,75 @@ typedef struct { #ifdef DCT_FLOAT_SUPPORTED /* Same as above for the floating-point case. */ float_DCT_method_ptr do_float_dct; + convsamp_float_method_ptr float_convsamp; + quantize_float_method_ptr float_quantize; FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; #endif } my_fdct_controller; typedef my_fdct_controller * my_fdct_ptr; +/* + * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address + * is aligned to a 16-byte boundary; if not, a general-protection exception + * (#GP) is generated. + */ + +#define ALIGN_SIZE 16 /* sizeof SSE/SSE2 register */ +#define ALIGN_MEM(p,a) ((void *) (((size_t) (p) + (a) - 1) & -(a))) + +#ifdef JFDCT_INT_QUANTIZE_WITH_DIVISION +#undef jpeg_quantize_int +#undef jpeg_quantize_int_mmx +#undef jpeg_quantize_int_sse2 +#define jpeg_quantize_int jpeg_quantize_idiv +#define jpeg_quantize_int_mmx jpeg_quantize_idiv +#define jpeg_quantize_int_sse2 jpeg_quantize_idiv +#endif + + +#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + +/* + * SIMD Ext: compute the reciprocal of the divisor + * + * This implementation is based on an algorithm described in + * "How to optimize for the Pentium family of microprocessors" + * (http://www.agner.org/assem/). + */ + +LOCAL(void) +compute_reciprocal (DCTELEM divisor, DCTELEM * dtbl) +{ + unsigned long d = ((unsigned long) divisor) & 0x0000FFFF; + unsigned long fq, fr; + int b, r, c; + + for (b = 0; (1UL << b) <= d; b++) ; + + r = 16 + (--b); + fq = (1UL << r) / d; + fr = (1UL << r) % d; + r -= 16; + c = 0; + + if (fr == 0) { + fq >>= 1; + r--; + } else if (fr <= (d / 2)) { + c++; + } else { + fq++; + } + + dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ + dtbl[DCTSIZE2 * 1] = (DCTELEM) (c + (d / 2)); /* correction + roundfactor */ + dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (16 - (r + 1 + 1))); /* scale */ + dtbl[DCTSIZE2 * 3] = (DCTELEM) (r + 1); /* shift */ +} + +#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */ + /* * Initialize for a processing pass. @@ -75,6 +147,18 @@ start_pass_fdctmgr (j_compress_ptr cinfo) /* For LL&M IDCT method, divisors are equal to raw quantization * coefficients multiplied by 8 (to counteract scaling). */ +#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { + compute_reciprocal ((DCTELEM) (qtbl->quantval[i] << 3), &dtbl[i]); + } + break; +#else /* JFDCT_INT_QUANTIZE_WITH_DIVISION */ if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -85,7 +169,8 @@ start_pass_fdctmgr (j_compress_ptr cinfo) dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; } break; -#endif +#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */ +#endif /* DCT_ISLOW_SUPPORTED */ #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: { @@ -109,6 +194,21 @@ start_pass_fdctmgr (j_compress_ptr cinfo) }; SHIFT_TEMPS +#ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { + compute_reciprocal ((DCTELEM) + DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], + (INT32) aanscales[i]), + CONST_BITS-3), + &dtbl[i]); + } +#else /* JFDCT_INT_QUANTIZE_WITH_DIVISION */ if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -121,9 +221,10 @@ start_pass_fdctmgr (j_compress_ptr cinfo) (INT32) aanscales[i]), CONST_BITS-3); } +#endif /* JFDCT_INT_QUANTIZE_WITH_DIVISION */ } break; -#endif +#endif /* DCT_IFAST_SUPPORTED */ #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: { @@ -183,83 +284,23 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, JDIMENSION num_blocks) /* This version is used for integer DCT implementations. */ { - /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - forward_DCT_method_ptr do_dct = fdct->do_dct; DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; - DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */ + DCTELEM workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(DCTELEM)]; + DCTELEM * wkptr = (DCTELEM *) ALIGN_MEM(workspace, ALIGN_SIZE); JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ - { register DCTELEM *workspaceptr; - register JSAMPROW elemptr; - register int elemr; - - workspaceptr = workspace; - for (elemr = 0; elemr < DCTSIZE; elemr++) { - elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; -#else - { register int elemc; - for (elemc = DCTSIZE; elemc > 0; elemc--) { - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - } - } -#endif - } - } + (*fdct->convsamp) (sample_data, start_col, wkptr); /* Perform the DCT */ - (*do_dct) (workspace); + (*fdct->do_dct) (wkptr); /* Quantize/descale the coefficients, and store into coef_blocks[] */ - { register DCTELEM temp, qval; - register int i; - register JCOEFPTR output_ptr = coef_blocks[bi]; - - for (i = 0; i < DCTSIZE2; i++) { - qval = divisors[i]; - temp = workspace[i]; - /* Divide the coefficient value by qval, ensuring proper rounding. - * Since C does not specify the direction of rounding for negative - * quotients, we have to force the dividend positive for portability. - * - * In most files, at least half of the output values will be zero - * (at default quantization settings, more like three-quarters...) - * so we should ensure that this case is fast. On many machines, - * a comparison is enough cheaper than a divide to make a special test - * a win. Since both inputs will be nonnegative, we need only test - * for a < b to discover whether a/b is 0. - * If your machine's division is fast enough, define FAST_DIVIDE. - */ -#ifdef FAST_DIVIDE -#define DIVIDE_BY(a,b) a /= b -#else -#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 -#endif - if (temp < 0) { - temp = -temp; - temp += qval>>1; /* for rounding */ - DIVIDE_BY(temp, qval); - temp = -temp; - } else { - temp += qval>>1; /* for rounding */ - DIVIDE_BY(temp, qval); - } - output_ptr[i] = (JCOEF) temp; - } - } + (*fdct->quantize) (coef_blocks[bi], divisors, wkptr); } } @@ -273,64 +314,23 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, JDIMENSION num_blocks) /* This version is used for floating-point DCT implementations. */ { - /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - float_DCT_method_ptr do_dct = fdct->do_float_dct; FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; - FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */ + FAST_FLOAT workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(FAST_FLOAT)]; + FAST_FLOAT * wkptr = (FAST_FLOAT *) ALIGN_MEM(workspace, ALIGN_SIZE); JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ - { register FAST_FLOAT *workspaceptr; - register JSAMPROW elemptr; - register int elemr; - - workspaceptr = workspace; - for (elemr = 0; elemr < DCTSIZE; elemr++) { - elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); -#else - { register int elemc; - for (elemc = DCTSIZE; elemc > 0; elemc--) { - *workspaceptr++ = (FAST_FLOAT) - (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - } - } -#endif - } - } + (*fdct->float_convsamp) (sample_data, start_col, wkptr); /* Perform the DCT */ - (*do_dct) (workspace); + (*fdct->do_float_dct) (wkptr); /* Quantize/descale the coefficients, and store into coef_blocks[] */ - { register FAST_FLOAT temp; - register int i; - register JCOEFPTR output_ptr = coef_blocks[bi]; - - for (i = 0; i < DCTSIZE2; i++) { - /* Apply the quantization and scaling factor */ - temp = workspace[i] * divisors[i]; - /* Round to nearest integer. - * Since C does not specify the direction of rounding for negative - * quotients, we have to force the dividend positive for portability. - * The maximum coefficient size is +-16K (for 12-bit data), so this - * code should work for either 16-bit or 32-bit ints. - */ - output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); - } - } + (*fdct->float_quantize) (coef_blocks[bi], divisors, wkptr); } } @@ -346,6 +346,7 @@ jinit_forward_dct (j_compress_ptr cinfo) { my_fdct_ptr fdct; int i; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -357,21 +358,86 @@ jinit_forward_dct (j_compress_ptr cinfo) #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: fdct->pub.forward_DCT = forward_DCT; - fdct->do_dct = jpeg_fdct_islow; - break; +#ifdef JFDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_islow_sse2)) { + fdct->do_dct = jpeg_fdct_islow_sse2; + fdct->convsamp = jpeg_convsamp_int_sse2; + fdct->quantize = jpeg_quantize_int_sse2; + } else +#endif +#ifdef JFDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + fdct->do_dct = jpeg_fdct_islow_mmx; + fdct->convsamp = jpeg_convsamp_int_mmx; + fdct->quantize = jpeg_quantize_int_mmx; + } else #endif + { + fdct->do_dct = jpeg_fdct_islow; + fdct->convsamp = jpeg_convsamp_int; + fdct->quantize = jpeg_quantize_int; + } + break; +#endif /* DCT_ISLOW_SUPPORTED */ #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: fdct->pub.forward_DCT = forward_DCT; - fdct->do_dct = jpeg_fdct_ifast; - break; +#ifdef JFDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_ifast_sse2)) { + fdct->do_dct = jpeg_fdct_ifast_sse2; + fdct->convsamp = jpeg_convsamp_int_sse2; + fdct->quantize = jpeg_quantize_int_sse2; + } else #endif +#ifdef JFDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + fdct->do_dct = jpeg_fdct_ifast_mmx; + fdct->convsamp = jpeg_convsamp_int_mmx; + fdct->quantize = jpeg_quantize_int_mmx; + } else +#endif + { + fdct->do_dct = jpeg_fdct_ifast; + fdct->convsamp = jpeg_convsamp_int; + fdct->quantize = jpeg_quantize_int; + } + break; +#endif /* DCT_IFAST_SUPPORTED */ #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: fdct->pub.forward_DCT = forward_DCT_float; - fdct->do_float_dct = jpeg_fdct_float; - break; +#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED + if (simd & JSIMD_SSE && simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) { + fdct->do_float_dct = jpeg_fdct_float_sse; + fdct->float_convsamp = jpeg_convsamp_flt_sse2; + fdct->float_quantize = jpeg_quantize_flt_sse2; + } else +#endif +#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED + if (simd & JSIMD_SSE && + IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) { + fdct->do_float_dct = jpeg_fdct_float_sse; + fdct->float_convsamp = jpeg_convsamp_flt_sse; + fdct->float_quantize = jpeg_quantize_flt_sse; + } else +#endif +#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED + if (simd & JSIMD_3DNOW) { + fdct->do_float_dct = jpeg_fdct_float_3dnow; + fdct->float_convsamp = jpeg_convsamp_flt_3dnow; + fdct->float_quantize = jpeg_quantize_flt_3dnow; + } else #endif + { + fdct->do_float_dct = jpeg_fdct_float; + fdct->float_convsamp = jpeg_convsamp_float; + fdct->float_quantize = jpeg_quantize_float; + } + break; +#endif /* DCT_FLOAT_SUPPORTED */ default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; @@ -385,3 +451,65 @@ jinit_forward_dct (j_compress_ptr cinfo) #endif } } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_forward_dct (j_compress_ptr cinfo, int method) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + + switch (method) { +#ifdef DCT_ISLOW_SUPPORTED + case JDCT_ISLOW: +#ifdef JFDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_islow_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JFDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + return JSIMD_NONE; +#endif /* DCT_ISLOW_SUPPORTED */ +#ifdef DCT_IFAST_SUPPORTED + case JDCT_IFAST: +#ifdef JFDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_ifast_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JFDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + return JSIMD_NONE; +#endif /* DCT_IFAST_SUPPORTED */ +#ifdef DCT_FLOAT_SUPPORTED + case JDCT_FLOAT: +#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED + if (simd & JSIMD_SSE && simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) + return JSIMD_SSE; /* (JSIMD_SSE | JSIMD_SSE2); */ +#endif +#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED + if (simd & JSIMD_SSE && + IS_CONST_ALIGNED_16(jconst_fdct_float_sse)) + return JSIMD_SSE; /* (JSIMD_SSE | JSIMD_MMX); */ +#endif +#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED + if (simd & JSIMD_3DNOW) + return JSIMD_3DNOW; /* (JSIMD_3DNOW | JSIMD_MMX); */ +#endif + return JSIMD_NONE; +#endif /* DCT_FLOAT_SUPPORTED */ + default: + ; + } + + return JSIMD_NONE; /* not compiled */ +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jcolsamp.h b/jcolsamp.h new file mode 100644 index 0000000..2a27b53 --- /dev/null +++ b/jcolsamp.h @@ -0,0 +1,143 @@ +/* + * jcolsamp.h - private declarations for color conversion & up/downsampling + * + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * Last Modified : February 4, 2006 + * + * [TAB8] + */ + + +/* configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only + * valid setting on this SIMD extension. + */ +#if BITS_IN_JSAMPLE != 8 +#error "Sorry, this SIMD code only copes with 8-bit sample values." +#endif + +/* Short forms of external names for systems with brain-damaged linkers. */ + +#ifdef NEED_SHORT_EXTERNAL_NAMES +#define jpeg_rgb_ycc_convert_mmx jMRgbYccCnv /* jccolmmx.asm */ +#define jpeg_rgb_ycc_convert_sse2 jSRgbYccCnv /* jccolss2.asm */ +#define jpeg_h2v1_downsample_mmx jM21Downsample /* jcsammmx.asm */ +#define jpeg_h2v2_downsample_mmx jM22Downsample /* jcsammmx.asm */ +#define jpeg_h2v1_downsample_sse2 jS21Downsample /* jcsamss2.asm */ +#define jpeg_h2v2_downsample_sse2 jS22Downsample /* jcsamss2.asm */ +#define jpeg_ycc_rgb_convert_mmx jMYccRgbCnv /* jdcolmmx.asm */ +#define jpeg_ycc_rgb_convert_sse2 jSYccRgbCnv /* jdcolss2.asm */ +#define jpeg_h2v1_merged_upsample_mmx jM21MerUpsample /* jdmermmx.asm */ +#define jpeg_h2v2_merged_upsample_mmx jM22MerUpsample /* jdmermmx.asm */ +#define jpeg_h2v1_merged_upsample_sse2 jS21MerUpsample /* jdmerss2.asm */ +#define jpeg_h2v2_merged_upsample_sse2 jS22MerUpsample /* jdmerss2.asm */ +#define jpeg_h2v1_fancy_upsample_mmx jM21FanUpsample /* jdsammmx.asm */ +#define jpeg_h2v2_fancy_upsample_mmx jM22FanUpsample /* jdsammmx.asm */ +#define jpeg_h1v2_fancy_upsample_mmx jM12FanUpsample /* jdsammmx.asm */ +#define jpeg_h2v1_upsample_mmx jM21Upsample /* jdsammmx.asm */ +#define jpeg_h2v2_upsample_mmx jM22Upsample /* jdsammmx.asm */ +#define jpeg_h2v1_fancy_upsample_sse2 jS21FanUpsample /* jdsamss2.asm */ +#define jpeg_h2v2_fancy_upsample_sse2 jS22FanUpsample /* jdsamss2.asm */ +#define jpeg_h1v2_fancy_upsample_sse2 jS12FanUpsample /* jdsamss2.asm */ +#define jpeg_h2v1_upsample_sse2 jS21Upsample /* jdsamss2.asm */ +#define jpeg_h2v2_upsample_sse2 jS22Upsample /* jdsamss2.asm */ +#define jconst_rgb_ycc_convert_mmx jMCRgbYccCnv /* jccolmmx.asm */ +#define jconst_rgb_ycc_convert_sse2 jSCRgbYccCnv /* jccolss2.asm */ +#define jconst_ycc_rgb_convert_mmx jMCYccRgbCnv /* jdcolmmx.asm */ +#define jconst_ycc_rgb_convert_sse2 jSCYccRgbCnv /* jdcolss2.asm */ +#define jconst_merged_upsample_mmx jMCMerUpsample /* jdmermmx.asm */ +#define jconst_merged_upsample_sse2 jSCMerUpsample /* jdmerss2.asm */ +#define jconst_fancy_upsample_mmx jMCFanUpsample /* jdsammmx.asm */ +#define jconst_fancy_upsample_sse2 jSCFanUpsample /* jdsamss2.asm */ +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED +#define jpeg_simd_merged_upsampler jSiMUpsampler /* jdmerge.c */ +#endif +#endif /* NEED_SHORT_EXTERNAL_NAMES */ + +/* Extern declarations for color conversion & up/downsampling routines. */ + +EXTERN(void) jpeg_rgb_ycc_convert_mmx + JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jpeg_rgb_ycc_convert_sse2 + JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); + +EXTERN(void) jpeg_h2v1_downsample_mmx + JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data)); +EXTERN(void) jpeg_h2v2_downsample_mmx + JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data)); +EXTERN(void) jpeg_h2v1_downsample_sse2 + JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data)); +EXTERN(void) jpeg_h2v2_downsample_sse2 + JPP((j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data)); + +EXTERN(void) jpeg_ycc_rgb_convert_mmx + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows)); +EXTERN(void) jpeg_ycc_rgb_convert_sse2 + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows)); + +EXTERN(void) jpeg_h2v1_merged_upsample_mmx + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +EXTERN(void) jpeg_h2v2_merged_upsample_mmx + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +EXTERN(void) jpeg_h2v1_merged_upsample_sse2 + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +EXTERN(void) jpeg_h2v2_merged_upsample_sse2 + JPP((j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); + +EXTERN(void) jpeg_h2v1_fancy_upsample_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v2_fancy_upsample_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h1v2_fancy_upsample_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v1_upsample_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v2_upsample_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v1_fancy_upsample_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v2_fancy_upsample_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h1v2_fancy_upsample_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v1_upsample_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); +EXTERN(void) jpeg_h2v2_upsample_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + +extern const int jconst_rgb_ycc_convert_mmx[]; +extern const int jconst_rgb_ycc_convert_sse2[]; +extern const int jconst_ycc_rgb_convert_mmx[]; +extern const int jconst_ycc_rgb_convert_sse2[]; +extern const int jconst_merged_upsample_mmx[]; +extern const int jconst_merged_upsample_sse2[]; +extern const int jconst_fancy_upsample_mmx[]; +extern const int jconst_fancy_upsample_sse2[]; + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED +EXTERN(unsigned int) jpeg_simd_merged_upsampler JPP((j_decompress_ptr cinfo)); +#endif diff --git a/jcolsamp.inc b/jcolsamp.inc new file mode 100644 index 0000000..03f5dbd --- /dev/null +++ b/jcolsamp.inc @@ -0,0 +1,156 @@ +; +; jcolsamp.inc - private declarations for color conversion & up/downsampling +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; Last Modified : January 5, 2006 +; +; [TAB8] + +; -------------------------------------------------------------------------- +; +; configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only +; valid setting on this SIMD extension. +; +%if BITS_IN_JSAMPLE != 8 +%error "Sorry, this SIMD code only copes with 8-bit sample values." +%endif + +; Short forms of external names for systems with brain-damaged linkers. +; +%ifdef NEED_SHORT_EXTERNAL_NAMES +%define jpeg_rgb_ycc_convert_mmx jMRgbYccCnv ; jccolmmx.asm +%define jpeg_rgb_ycc_convert_sse2 jSRgbYccCnv ; jccolss2.asm +%define jpeg_h2v1_downsample_mmx jM21Downsample ; jcsammmx.asm +%define jpeg_h2v2_downsample_mmx jM22Downsample ; jcsammmx.asm +%define jpeg_h2v1_downsample_sse2 jS21Downsample ; jcsamss2.asm +%define jpeg_h2v2_downsample_sse2 jS22Downsample ; jcsamss2.asm +%define jpeg_ycc_rgb_convert_mmx jMYccRgbCnv ; jdcolmmx.asm +%define jpeg_ycc_rgb_convert_sse2 jSYccRgbCnv ; jdcolss2.asm +%define jpeg_h2v1_merged_upsample_mmx jM21MerUpsample ; jdmermmx.asm +%define jpeg_h2v2_merged_upsample_mmx jM22MerUpsample ; jdmermmx.asm +%define jpeg_h2v1_merged_upsample_sse2 jS21MerUpsample ; jdmerss2.asm +%define jpeg_h2v2_merged_upsample_sse2 jS22MerUpsample ; jdmerss2.asm +%define jpeg_h2v1_fancy_upsample_mmx jM21FanUpsample ; jdsammmx.asm +%define jpeg_h2v2_fancy_upsample_mmx jM22FanUpsample ; jdsammmx.asm +%define jpeg_h1v2_fancy_upsample_mmx jM12FanUpsample ; jdsammmx.asm +%define jpeg_h2v1_upsample_mmx jM21Upsample ; jdsammmx.asm +%define jpeg_h2v2_upsample_mmx jM22Upsample ; jdsammmx.asm +%define jpeg_h2v1_fancy_upsample_sse2 jS21FanUpsample ; jdsamss2.asm +%define jpeg_h2v2_fancy_upsample_sse2 jS22FanUpsample ; jdsamss2.asm +%define jpeg_h1v2_fancy_upsample_sse2 jS12FanUpsample ; jdsamss2.asm +%define jpeg_h2v1_upsample_sse2 jS21Upsample ; jdsamss2.asm +%define jpeg_h2v2_upsample_sse2 jS22Upsample ; jdsamss2.asm +%define jconst_rgb_ycc_convert_mmx jMCRgbYccCnv ; jccolmmx.asm +%define jconst_rgb_ycc_convert_sse2 jSCRgbYccCnv ; jccolss2.asm +%define jconst_ycc_rgb_convert_mmx jMCYccRgbCnv ; jdcolmmx.asm +%define jconst_ycc_rgb_convert_sse2 jSCYccRgbCnv ; jdcolss2.asm +%define jconst_merged_upsample_mmx jMCMerUpsample ; jdmermmx.asm +%define jconst_merged_upsample_sse2 jSCMerUpsample ; jdmerss2.asm +%define jconst_fancy_upsample_mmx jMCFanUpsample ; jdsammmx.asm +%define jconst_fancy_upsample_sse2 jSCFanUpsample ; jdsamss2.asm +%endif ; NEED_SHORT_EXTERNAL_NAMES + +; -------------------------------------------------------------------------- + +; pseudo-resisters to make ordering of RGB configurable +; +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%if RGB_RED < 0 || RGB_RED >= RGB_PIXELSIZE || RGB_GREEN < 0 || \ + RGB_GREEN >= RGB_PIXELSIZE || RGB_BLUE < 0 || RGB_BLUE >= RGB_PIXELSIZE || \ + RGB_RED == RGB_GREEN || RGB_GREEN == RGB_BLUE || RGB_RED == RGB_BLUE +%error "Incorrect RGB pixel offset." +%endif + +%if RGB_RED == 0 +%define mmA mm0 +%define mmB mm1 +%define xmmA xmm0 +%define xmmB xmm1 +%elif RGB_GREEN == 0 +%define mmA mm2 +%define mmB mm3 +%define xmmA xmm2 +%define xmmB xmm3 +%elif RGB_BLUE == 0 +%define mmA mm4 +%define mmB mm5 +%define xmmA xmm4 +%define xmmB xmm5 +%else +%define mmA mm6 +%define mmB mm7 +%define xmmA xmm6 +%define xmmB xmm7 +%endif + +%if RGB_RED == 1 +%define mmC mm0 +%define mmD mm1 +%define xmmC xmm0 +%define xmmD xmm1 +%elif RGB_GREEN == 1 +%define mmC mm2 +%define mmD mm3 +%define xmmC xmm2 +%define xmmD xmm3 +%elif RGB_BLUE == 1 +%define mmC mm4 +%define mmD mm5 +%define xmmC xmm4 +%define xmmD xmm5 +%else +%define mmC mm6 +%define mmD mm7 +%define xmmC xmm6 +%define xmmD xmm7 +%endif + +%if RGB_RED == 2 +%define mmE mm0 +%define mmF mm1 +%define xmmE xmm0 +%define xmmF xmm1 +%elif RGB_GREEN == 2 +%define mmE mm2 +%define mmF mm3 +%define xmmE xmm2 +%define xmmF xmm3 +%elif RGB_BLUE == 2 +%define mmE mm4 +%define mmF mm5 +%define xmmE xmm4 +%define xmmF xmm5 +%else +%define mmE mm6 +%define mmF mm7 +%define xmmE xmm6 +%define xmmF xmm7 +%endif + +%if RGB_RED == 3 +%define mmG mm0 +%define mmH mm1 +%define xmmG xmm0 +%define xmmH xmm1 +%elif RGB_GREEN == 3 +%define mmG mm2 +%define mmH mm3 +%define xmmG xmm2 +%define xmmH xmm3 +%elif RGB_BLUE == 3 +%define mmG mm4 +%define mmH mm5 +%define xmmG xmm4 +%define xmmH xmm5 +%else +%define mmG mm6 +%define mmH mm7 +%define xmmG xmm6 +%define xmmH xmm7 +%endif +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 + +; -------------------------------------------------------------------------- diff --git a/jcomapi.c b/jcomapi.c index 9b1fa75..e4235c0 100644 --- a/jcomapi.c +++ b/jcomapi.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : March 11, 2005 + * --------------------------------------------------------------------- + * * This file contains application interface routines that are used for both * compression and decompression. */ @@ -104,3 +111,54 @@ jpeg_alloc_huff_table (j_common_ptr cinfo) tbl->sent_table = FALSE; /* make sure this is false in any new table */ return tbl; } + + +/* + * SIMD Ext: Checking for support of SIMD instruction set. + */ + +GLOBAL(unsigned int) +jpeg_simd_support (j_common_ptr cinfo) +{ + enum { JSIMD_INVALID = ~0 }; + static volatile unsigned int simd_supported = JSIMD_INVALID; + + if (simd_supported == JSIMD_INVALID) + simd_supported = jpeg_simd_os_support(jpeg_simd_cpu_support()); + +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + if (cinfo != NULL) /* Turn off the masked flags */ + return simd_supported & ~jpeg_simd_mask(cinfo, JSIMD_NONE, JSIMD_NONE); +#endif + return simd_supported; +} + +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED + +/* + * SIMD Ext: modify/retrieve SIMD instruction mask + */ + +GLOBAL(unsigned int) +jpeg_simd_mask (j_common_ptr cinfo, unsigned int remove, unsigned int add) +{ + unsigned long *gp; + unsigned int oldmask; + + if (cinfo->is_decompressor) + gp = (unsigned long *) &((j_decompress_ptr) cinfo)->output_gamma; + else /* compressor */ + gp = (unsigned long *) &((j_compress_ptr) cinfo)->input_gamma; + + if ((gp[1] == 0x3FF00000 || gp[1] == 0x00000000) && /* +1.0 or +0.0 */ + (gp[0] & ~JSIMD_ALL) == 0) { + oldmask = gp[0]; + if (((remove | add) & ~JSIMD_ALL) == 0) + gp[0] = (oldmask & ~remove) | add; + } else { + oldmask = 0; /* error */ + } + return oldmask; +} + +#endif /* !JSIMD_MASKFUNC_NOT_SUPPORTED */ diff --git a/jconfig.bc5 b/jconfig.bc5 new file mode 100644 index 0000000..50c309d --- /dev/null +++ b/jconfig.bc5 @@ -0,0 +1,48 @@ +/* jconfig.bc5 --- jconfig.h for Borland C++ Compiler 5.5 (win32) */ +/* see jconfig.doc for explanations */ + +#define HAVE_PROTOTYPES +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT +/* #define void char */ +/* #define const */ +#undef CHAR_IS_UNSIGNED +#define HAVE_STDDEF_H +#define HAVE_STDLIB_H +#undef NEED_BSD_STRINGS +#undef NEED_SYS_TYPES_H +#undef NEED_FAR_POINTERS /* we presume a 32-bit flat memory model */ +#undef NEED_SHORT_EXTERNAL_NAMES +#undef INCOMPLETE_TYPES_BROKEN /* this assumes you have -w-stu in CFLAGS */ + +/* Define "boolean" as unsigned char, not int, per Windows custom */ +#define TYPEDEF_UCHAR_BOOLEAN + +#ifdef JPEG_INTERNALS + +#undef RIGHT_SHIFT_IS_UNSIGNED + +#endif /* JPEG_INTERNALS */ + +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + +#ifdef JPEG_CJPEG_DJPEG + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +#define TWO_FILE_COMMANDLINE +#define USE_SETMODE /* Borland has setmode() */ +#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */ +#undef DONT_USE_B_MODE +#undef PROGRESS_REPORT /* optional */ + +#endif /* JPEG_CJPEG_DJPEG */ diff --git a/jconfig.cfg b/jconfig.cfg index 36a04fa..147cb6b 100644 --- a/jconfig.cfg +++ b/jconfig.cfg @@ -16,6 +16,9 @@ /* Define this if you get warnings about undefined structures. */ #undef INCOMPLETE_TYPES_BROKEN +/* Define "boolean" as unsigned char, not int, per Windows custom */ +#undef TYPEDEF_UCHAR_BOOLEAN + #ifdef JPEG_INTERNALS #undef RIGHT_SHIFT_IS_UNSIGNED @@ -26,6 +29,13 @@ #endif /* JPEG_INTERNALS */ +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + #ifdef JPEG_CJPEG_DJPEG #define BMP_SUPPORTED /* BMP image file format */ @@ -35,6 +45,8 @@ #define TARGA_SUPPORTED /* Targa image file format */ #undef TWO_FILE_COMMANDLINE +#undef USE_SETMODE +#undef USE_FDOPEN #undef NEED_SIGNAL_CATCHER #undef DONT_USE_B_MODE diff --git a/jconfig.dj b/jconfig.dj index f759a9d..b5a2e47 100644 --- a/jconfig.dj +++ b/jconfig.dj @@ -21,6 +21,13 @@ #endif /* JPEG_INTERNALS */ +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + #ifdef JPEG_CJPEG_DJPEG #define BMP_SUPPORTED /* BMP image file format */ @@ -35,4 +42,6 @@ #undef DONT_USE_B_MODE #undef PROGRESS_REPORT /* optional */ +#define FREE_MEM_ESTIMATE 0 /* for alternate cjpeg/djpeg */ + #endif /* JPEG_CJPEG_DJPEG */ diff --git a/jconfig.linux b/jconfig.linux new file mode 100644 index 0000000..6c38ed5 --- /dev/null +++ b/jconfig.linux @@ -0,0 +1,44 @@ +/* jconfig.linux --- jconfig.h for Linux ELF with gcc */ +/* see jconfig.doc for explanations */ + +#define HAVE_PROTOTYPES +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT +/* #define void char */ +/* #define const */ +#undef CHAR_IS_UNSIGNED +#define HAVE_STDDEF_H +#define HAVE_STDLIB_H +#undef NEED_BSD_STRINGS +#undef NEED_SYS_TYPES_H +#undef NEED_FAR_POINTERS +#undef NEED_SHORT_EXTERNAL_NAMES +#undef INCOMPLETE_TYPES_BROKEN + +#ifdef JPEG_INTERNALS + +#undef RIGHT_SHIFT_IS_UNSIGNED + +#endif /* JPEG_INTERNALS */ + +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + +#ifdef JPEG_CJPEG_DJPEG + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +#undef TWO_FILE_COMMANDLINE +#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */ +#undef DONT_USE_B_MODE +#undef PROGRESS_REPORT /* optional */ + +#endif /* JPEG_CJPEG_DJPEG */ diff --git a/jconfig.mgw b/jconfig.mgw new file mode 100644 index 0000000..83dfe1d --- /dev/null +++ b/jconfig.mgw @@ -0,0 +1,48 @@ +/* jconfig.mgw --- jconfig.h for MinGW */ +/* see jconfig.doc for explanations */ + +#define HAVE_PROTOTYPES +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT +/* #define void char */ +/* #define const */ +#undef CHAR_IS_UNSIGNED +#define HAVE_STDDEF_H +#define HAVE_STDLIB_H +#undef NEED_BSD_STRINGS +#undef NEED_SYS_TYPES_H +#undef NEED_FAR_POINTERS +#undef NEED_SHORT_EXTERNAL_NAMES +#undef INCOMPLETE_TYPES_BROKEN + +/* Define "boolean" as unsigned char, not int, per Windows custom */ +#define TYPEDEF_UCHAR_BOOLEAN + +#ifdef JPEG_INTERNALS + +#undef RIGHT_SHIFT_IS_UNSIGNED + +#endif /* JPEG_INTERNALS */ + +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + +#ifdef JPEG_CJPEG_DJPEG + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +#define TWO_FILE_COMMANDLINE /* optional */ +#define USE_SETMODE /* MinGW has setmode() */ +#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */ +#undef DONT_USE_B_MODE +#undef PROGRESS_REPORT /* optional */ + +#endif /* JPEG_CJPEG_DJPEG */ diff --git a/jconfig.vc b/jconfig.vc index 7e291c7..d5bc9f9 100644 --- a/jconfig.vc +++ b/jconfig.vc @@ -16,11 +16,7 @@ #undef INCOMPLETE_TYPES_BROKEN /* Define "boolean" as unsigned char, not int, per Windows custom */ -#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ -typedef unsigned char boolean; -#endif -#define HAVE_BOOLEAN /* prevent jmorecfg.h from redefining it */ - +#define TYPEDEF_UCHAR_BOOLEAN #ifdef JPEG_INTERNALS @@ -28,6 +24,13 @@ typedef unsigned char boolean; #endif /* JPEG_INTERNALS */ +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + #ifdef JPEG_CJPEG_DJPEG #define BMP_SUPPORTED /* BMP image file format */ diff --git a/jcqnt3dn.asm b/jcqnt3dn.asm new file mode 100644 index 0000000..8197858 --- /dev/null +++ b/jcqnt3dn.asm @@ -0,0 +1,240 @@ +; +; jcqnt3dn.asm - sample data conversion and quantization (3DNow! & MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 23, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_flt_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_convsamp_flt_3dnow) + +EXTN(jpeg_convsamp_flt_3dnow): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + pi2fd mm4,mm4 + pi2fd mm2,mm2 + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + pi2fd mm5,mm5 + pi2fd mm0,mm0 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + pi2fd mm6,mm6 + pi2fd mm3,mm3 + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + pi2fd mm4,mm4 + pi2fd mm1,mm1 + + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jpeg_quantize_flt_3dnow (JCOEFPTR coef_block, FAST_FLOAT * divisors, +; FAST_FLOAT * workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT * divisors +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_quantize_flt_3dnow) + +EXTN(jpeg_quantize_flt_3dnow): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic) + movd mm7,eax + punpckldq mm7,mm7 ; mm7={12582912.0F 12582912.0F} + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm0,mm7 ; mm0=(00 ** 01 **) + pfadd mm1,mm7 ; mm1=(02 ** 03 **) + pfadd mm2,mm7 ; mm0=(04 ** 05 **) + pfadd mm3,mm7 ; mm1=(06 ** 07 **) + + movq mm4,mm0 + punpcklwd mm0,mm1 ; mm0=(00 02 ** **) + punpckhwd mm4,mm1 ; mm4=(01 03 ** **) + movq mm5,mm2 + punpcklwd mm2,mm3 ; mm2=(04 06 ** **) + punpckhwd mm5,mm3 ; mm5=(05 07 ** **) + + punpcklwd mm0,mm4 ; mm0=(00 01 02 03) + punpcklwd mm2,mm5 ; mm2=(04 05 06 07) + + movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] + movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm6,mm7 ; mm0=(10 ** 11 **) + pfadd mm1,mm7 ; mm4=(12 ** 13 **) + pfadd mm3,mm7 ; mm0=(14 ** 15 **) + pfadd mm4,mm7 ; mm4=(16 ** 17 **) + + movq mm5,mm6 + punpcklwd mm6,mm1 ; mm6=(10 12 ** **) + punpckhwd mm5,mm1 ; mm5=(11 13 ** **) + movq mm1,mm3 + punpcklwd mm3,mm4 ; mm3=(14 16 ** **) + punpckhwd mm1,mm4 ; mm1=(15 17 ** **) + + punpcklwd mm6,mm5 ; mm6=(10 11 12 13) + punpcklwd mm3,mm1 ; mm3=(14 15 16 17) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +%endif ; JFDCT_FLT_3DNOW_MMX_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jcqntflt.asm b/jcqntflt.asm new file mode 100644 index 0000000..4631a06 --- /dev/null +++ b/jcqntflt.asm @@ -0,0 +1,202 @@ +; +; jcqntflt.asm - sample data conversion and quantization (non-SIMD, FP) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : March 21, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_convsamp_float) + +EXTN(jpeg_convsamp_float): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi] ; (JSAMPLE *) + add ebx, JDIMENSION [start_col] + +%assign i 0 ; i=0 +%rep 4 ; -- repeat 4 times --- + xor eax,eax + xor edx,edx + mov al, JSAMPLE [ebx+(i+0)*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [ebx+(i+1)*SIZEOF_JSAMPLE] + add eax, byte -CENTERJSAMPLE + add edx, byte -CENTERJSAMPLE + push eax + push edx +%assign i i+2 ; i+=2 +%endrep ; -- repeat end --- + + fild INT32 [esp+0*SIZEOF_INT32] + fild INT32 [esp+1*SIZEOF_INT32] + fild INT32 [esp+2*SIZEOF_INT32] + fild INT32 [esp+3*SIZEOF_INT32] + fild INT32 [esp+4*SIZEOF_INT32] + fild INT32 [esp+5*SIZEOF_INT32] + fild INT32 [esp+6*SIZEOF_INT32] + fild INT32 [esp+7*SIZEOF_INT32] + + add esp, byte DCTSIZE*SIZEOF_INT32 + + fstp FAST_FLOAT [edi+0*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+1*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+2*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+3*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+4*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+5*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+6*SIZEOF_FAST_FLOAT] + fstp FAST_FLOAT [edi+7*SIZEOF_FAST_FLOAT] + + add esi, byte SIZEOF_JSAMPROW + add edi, byte DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jpeg_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, +; FAST_FLOAT * workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT * divisors +%define workspace ebp+16 ; FAST_FLOAT * workspace + +%define FLT_ROUNDS 1 ; from + + align 16 + global EXTN(jpeg_quantize_float) + +EXTN(jpeg_quantize_float): + push ebp + mov ebp,esp + push ebx +; push ecx ; unused +; push edx ; unused + push esi + push edi + +%if (FLT_ROUNDS != 1) + push eax + fnstcw word [esp] + mov eax, [esp] + and eax, (~0x0C00) ; round to nearest integer + push eax + fldcw word [esp] + pop eax +%endif + mov esi, POINTER [workspace] + mov ebx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/8 + alignx 16,7 +.quantloop: + fld FAST_FLOAT [esi+0*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+0*SIZEOF_FAST_FLOAT] + fld FAST_FLOAT [esi+1*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+1*SIZEOF_FAST_FLOAT] + fld FAST_FLOAT [esi+2*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+2*SIZEOF_FAST_FLOAT] + fld FAST_FLOAT [esi+3*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+3*SIZEOF_FAST_FLOAT] + + fld FAST_FLOAT [esi+4*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+4*SIZEOF_FAST_FLOAT] + fxch st0,st1 + fld FAST_FLOAT [esi+5*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+5*SIZEOF_FAST_FLOAT] + fxch st0,st3 + fld FAST_FLOAT [esi+6*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+6*SIZEOF_FAST_FLOAT] + fxch st0,st5 + fld FAST_FLOAT [esi+7*SIZEOF_FAST_FLOAT] + fmul FAST_FLOAT [ebx+7*SIZEOF_FAST_FLOAT] + fxch st0,st7 + + fistp JCOEF [edi+0*SIZEOF_JCOEF] + fistp JCOEF [edi+1*SIZEOF_JCOEF] + fistp JCOEF [edi+2*SIZEOF_JCOEF] + fistp JCOEF [edi+3*SIZEOF_JCOEF] + fistp JCOEF [edi+4*SIZEOF_JCOEF] + fistp JCOEF [edi+5*SIZEOF_JCOEF] + fistp JCOEF [edi+6*SIZEOF_JCOEF] + fistp JCOEF [edi+7*SIZEOF_JCOEF] + + add esi, byte 8*SIZEOF_FAST_FLOAT + add ebx, byte 8*SIZEOF_FAST_FLOAT + add edi, byte 8*SIZEOF_JCOEF + dec eax + jnz short .quantloop + +%if (FLT_ROUNDS != 1) + fldcw word [esp] + pop eax ; pop old control word +%endif + pop edi + pop esi +; pop edx ; unused +; pop ecx ; unused + pop ebx + pop ebp + ret + +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jcqntint.asm b/jcqntint.asm new file mode 100644 index 0000000..e0de1cb --- /dev/null +++ b/jcqntint.asm @@ -0,0 +1,243 @@ +; +; jcqntint.asm - sample data conversion and quantization (non-SIMD, integer) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 27, 2005 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_int (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_convsamp_int) + +EXTN(jpeg_convsamp_int): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi] ; (JSAMPLE *) + add ebx, JDIMENSION [start_col] + +%assign i 0 ; i=0 +%rep 4 ; -- repeat 4 times --- + xor eax,eax + xor edx,edx + mov al, JSAMPLE [ebx+(i+0)*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [ebx+(i+1)*SIZEOF_JSAMPLE] + add eax, byte -CENTERJSAMPLE + add edx, byte -CENTERJSAMPLE + mov DCTELEM [edi+(i+0)*SIZEOF_DCTELEM], ax + mov DCTELEM [edi+(i+1)*SIZEOF_DCTELEM], dx +%assign i i+2 ; i+=2 +%endrep ; -- repeat end --- + + add esi, byte SIZEOF_JSAMPROW + add edi, byte DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jpeg_quantize_int (JCOEFPTR coef_block, DCTELEM * divisors, +; DCTELEM * workspace); +; + +%define RECIPROCAL(i,b) ((b)+((i)+DCTSIZE2*0)*SIZEOF_DCTELEM) +%define CORRECTION(i,b) ((b)+((i)+DCTSIZE2*1)*SIZEOF_DCTELEM) +%define SHIFT(i,b) ((b)+((i)+DCTSIZE2*3)*SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM * divisors +%define workspace ebp+16 ; DCTELEM * workspace + +%define UNROLL 2 + + align 16 + global EXTN(jpeg_quantize_int) + +EXTN(jpeg_quantize_int): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov ebx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ecx, DCTSIZE2/UNROLL + alignx 16,7 +.quantloop: + push ecx + +%assign i 0 ; i=0; +%rep UNROLL ; ---- repeat (UNROLL) times ---- + mov cx, DCTELEM [esi+(i)*SIZEOF_DCTELEM] + mov ax,cx + sar cx,(WORD_BIT-1) + xor ax,cx ; if (ax < 0) ax = -ax; + sub ax,cx + add ax, DCTELEM [CORRECTION(i,ebx)] ; correction + roundfactor + shl ax,1 + mul DCTELEM [RECIPROCAL(i,ebx)] ; reciprocal + mov ax,cx + mov cx, DCTELEM [SHIFT(i,ebx)] ; shift + shr dx,cl + xor dx,ax + sub dx,ax + mov JCOEF [edi+(i)*SIZEOF_JCOEF], dx +%assign i i+1 ; i++; +%endrep ; ---- repeat end ---- + + pop ecx + + add esi, byte UNROLL*SIZEOF_DCTELEM + add ebx, byte UNROLL*SIZEOF_DCTELEM + add edi, byte UNROLL*SIZEOF_JCOEF + dec ecx + jnz .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%else ; JFDCT_INT_QUANTIZE_WITH_DIVISION + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jpeg_quantize_idiv (JCOEFPTR coef_block, DCTELEM * divisors, +; DCTELEM * workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM * divisors +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_quantize_idiv) + +EXTN(jpeg_quantize_idiv): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov ebx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ecx, DCTSIZE2 + alignx 16,7 +.quantloop: + push ecx + + movsx ecx, DCTELEM [esi] ; temp + mov eax,ecx + sar ecx,(DWORD_BIT-1) + xor edx,edx + mov dx, DCTELEM [ebx] ; qval + xor eax,ecx ; if (eax < 0) eax = -eax; + shr edx,1 + sub eax,ecx + cmp eax,edx ; if (temp + qval/2 >= qval) + jge short .quant + ; ---- if the quantized coefficient is zero + xor eax,eax + jmp short .output + alignx 16,7 +.quant: ; ---- do quantization + add eax,edx + xor edx,edx + div DCTELEM [ebx] ; Q:ax,R:dx + xor ax,cx + sub ax,cx + alignx 16,7 +.output: + mov JCOEF [edi], ax + + pop ecx + + add esi, byte SIZEOF_DCTELEM + add ebx, byte SIZEOF_DCTELEM + add edi, byte SIZEOF_JCOEF + dec ecx + jnz short .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION diff --git a/jcqntmmx.asm b/jcqntmmx.asm new file mode 100644 index 0000000..9cdf584 --- /dev/null +++ b/jcqntmmx.asm @@ -0,0 +1,254 @@ +; +; jcqntmmx.asm - sample data conversion and quantization (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 27, 2005 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef JFDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_int_mmx (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_convsamp_int_mmx) + +EXTN(jpeg_convsamp_int_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor mm6,mm6 ; mm6=(all 0's) + pcmpeqw mm7,mm7 + psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567) + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN) + movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV) + + movq mm4,mm0 + punpcklbw mm0,mm6 ; mm0=(0123) + punpckhbw mm4,mm6 ; mm4=(4567) + movq mm5,mm1 + punpcklbw mm1,mm6 ; mm1=(89AB) + punpckhbw mm5,mm6 ; mm5=(CDEF) + + paddw mm0,mm7 + paddw mm4,mm7 + paddw mm1,mm7 + paddw mm5,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5 + + movq mm0,mm2 + punpcklbw mm2,mm6 ; mm2=(GHIJ) + punpckhbw mm0,mm6 ; mm0=(KLMN) + movq mm4,mm3 + punpcklbw mm3,mm6 ; mm3=(OPQR) + punpckhbw mm4,mm6 ; mm4=(STUV) + + paddw mm2,mm7 + paddw mm0,mm7 + paddw mm3,mm7 + paddw mm4,mm7 + + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jpeg_quantize_int_mmx (JCOEFPTR coef_block, DCTELEM * divisors, +; DCTELEM * workspace); +; + +%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM * divisors +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_quantize_int_mmx) + +EXTN(jpeg_quantize_int_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ah, 2 + alignx 16,7 +.quantloop1: + mov al, DCTSIZE2/8/2 + alignx 16,7 +.quantloop2: + movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)] + movq mm0,mm2 + movq mm1,mm3 + psraw mm2,(WORD_BIT-1) + psraw mm3,(WORD_BIT-1) + pxor mm0,mm2 + pxor mm1,mm3 + psubw mm0,mm2 ; if (mm0 < 0) mm0 = -mm0; + psubw mm1,mm3 ; if (mm1 < 0) mm1 = -mm1; + + ; unsigned long unsigned_multiply(unsigned short x, unsigned short y) + ; { + ; enum { SHORT_BIT = 16 }; + ; signed short sx = (signed short) x; + ; signed short sy = (signed short) y; + ; signed long sz; + ; + ; sz = (long) sx * (long) sy; /* signed multiply */ + ; + ; if (sx < 0) sz += (long) sy << SHORT_BIT; + ; if (sy < 0) sz += (long) sx << SHORT_BIT; + ; + ; return (unsigned long) sz; + ; } + + paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw mm1, MMWORD [CORRECTION(0,1,edx)] + psllw mm0,1 + psllw mm1,1 + movq mm4,mm0 + movq mm5,mm1 + pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] + movq mm6, MMWORD [SCALE(0,0,edx)] ; scale + movq mm7, MMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 ; reciprocal is always negative (MSB=1) + paddw mm1,mm5 + psllw mm0,1 + psllw mm1,1 + movq mm4,mm0 + movq mm5,mm1 + pmulhw mm0,mm6 + pmulhw mm1,mm7 + psraw mm6,(WORD_BIT-1) + psraw mm7,(WORD_BIT-1) + pand mm6,mm4 + pand mm7,mm5 + paddw mm0,mm6 + paddw mm1,mm7 + psraw mm4,(WORD_BIT-1) + psraw mm5,(WORD_BIT-1) + pand mm4, MMWORD [SCALE(0,0,edx)] ; scale + pand mm5, MMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 + paddw mm1,mm5 + + pxor mm0,mm2 + pxor mm1,mm3 + psubw mm0,mm2 + psubw mm1,mm3 + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1 + + add esi, byte 8*SIZEOF_DCTELEM + add edx, byte 8*SIZEOF_DCTELEM + add edi, byte 8*SIZEOF_JCOEF + dec al + jnz near .quantloop2 + dec ah + jnz near .quantloop1 ; to avoid branch misprediction + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION +%endif ; JFDCT_INT_MMX_SUPPORTED diff --git a/jcqnts2f.asm b/jcqnts2f.asm new file mode 100644 index 0000000..faf663e --- /dev/null +++ b/jcqnts2f.asm @@ -0,0 +1,178 @@ +; +; jcqnts2f.asm - sample data conversion and quantization (SSE & SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 18, 2005 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_flt_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_convsamp_flt_sse2) + +EXTN(jpeg_convsamp_flt_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw xmm7,xmm7 + psllw xmm7,7 + packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, _MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm1, _MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb xmm0,xmm7 ; xmm0=(01234567) + psubb xmm1,xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2,xmm2 ; xmm2=(0123) + cvtdq2ps xmm0,xmm0 ; xmm0=(4567) + psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jpeg_quantize_flt_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors, +; FAST_FLOAT * workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT * divisors +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_quantize_flt_sse2) + +EXTN(jpeg_quantize_flt_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0,xmm0 + cvtps2dq xmm1,xmm1 + cvtps2dq xmm2,xmm2 + cvtps2dq xmm3,xmm3 + + packssdw xmm0,xmm1 + packssdw xmm2,xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +%endif ; JFDCT_FLT_SSE_SSE2_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jcqnts2i.asm b/jcqnts2i.asm new file mode 100644 index 0000000..71bae2c --- /dev/null +++ b/jcqnts2i.asm @@ -0,0 +1,216 @@ +; +; jcqnts2i.asm - sample data conversion and quantization (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 27, 2005 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef JFDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_int_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_convsamp_int_sse2) + +EXTN(jpeg_convsamp_int_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor xmm6,xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7,xmm7 + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, _MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, _MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, _MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, _MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0,xmm6 ; xmm0=(01234567) + punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) + paddw xmm0,xmm7 + paddw xmm1,xmm7 + punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2,xmm7 + paddw xmm3,xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%ifndef JFDCT_INT_QUANTIZE_WITH_DIVISION + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jpeg_quantize_int_sse2 (JCOEFPTR coef_block, DCTELEM * divisors, +; DCTELEM * workspace); +; + +%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) +%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) +%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; DCTELEM * divisors +%define workspace ebp+16 ; DCTELEM * workspace + + align 16 + global EXTN(jpeg_quantize_int_sse2) + +EXTN(jpeg_quantize_int_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/32 + alignx 16,7 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)] + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + psraw xmm4,(WORD_BIT-1) + psraw xmm5,(WORD_BIT-1) + psraw xmm6,(WORD_BIT-1) + psraw xmm7,(WORD_BIT-1) + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,edx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,edx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,edx)] + psllw xmm0,1 + psllw xmm1,1 + psllw xmm2,1 + psllw xmm3,1 + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)] + psllw xmm0,1 + psllw xmm1,1 + psllw xmm2,1 + psllw xmm3,1 + pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)] + + pxor xmm0,xmm4 + pxor xmm1,xmm5 + pxor xmm2,xmm6 + pxor xmm3,xmm7 + psubw xmm0,xmm4 + psubw xmm1,xmm5 + psubw xmm2,xmm6 + psubw xmm3,xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 32*SIZEOF_DCTELEM + add edx, byte 32*SIZEOF_DCTELEM + add edi, byte 32*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +%endif ; !JFDCT_INT_QUANTIZE_WITH_DIVISION +%endif ; JFDCT_INT_SSE2_SUPPORTED diff --git a/jcqntsse.asm b/jcqntsse.asm new file mode 100644 index 0000000..fe99a20 --- /dev/null +++ b/jcqntsse.asm @@ -0,0 +1,218 @@ +; +; jcqntsse.asm - sample data conversion and quantization (SSE & MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 12, 2005 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JFDCT_FLT_SSE_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jpeg_convsamp_flt_sse (JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT * workspace); +; + +%define sample_data ebp+8 ; JSAMPARRAY sample_data +%define start_col ebp+12 ; JDIMENSION start_col +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_convsamp_flt_sse) + +EXTN(jpeg_convsamp_flt_sse): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7,mm7 + psllw mm7,7 + packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0,mm7 ; mm0=(01234567) + psubb mm1,mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4,mm2 ; mm4=(***0***1) + punpckhwd mm2,mm2 ; mm2=(***2***3) + punpcklwd mm5,mm0 ; mm5=(***4***5) + punpckhwd mm0,mm0 ; mm0=(***6***7) + + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) + cvtpi2ps xmm0,mm4 ; xmm0=(01**) + cvtpi2ps xmm1,mm2 ; xmm1=(23**) + psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) + cvtpi2ps xmm2,mm5 ; xmm2=(45**) + cvtpi2ps xmm3,mm0 ; xmm3=(67**) + + punpcklwd mm6,mm3 ; mm6=(***8***9) + punpckhwd mm3,mm3 ; mm3=(***A***B) + punpcklwd mm4,mm1 ; mm4=(***C***D) + punpckhwd mm1,mm1 ; mm1=(***E***F) + + psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) + cvtpi2ps xmm4,mm6 ; xmm4=(89**) + cvtpi2ps xmm5,mm3 ; xmm5=(AB**) + psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) + cvtpi2ps xmm6,mm4 ; xmm6=(CD**) + cvtpi2ps xmm7,mm1 ; xmm7=(EF**) + + movlhps xmm0,xmm1 ; xmm0=(0123) + movlhps xmm2,xmm3 ; xmm2=(4567) + movlhps xmm4,xmm5 ; xmm4=(89AB) + movlhps xmm6,xmm7 ; xmm6=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jpeg_quantize_flt_sse (JCOEFPTR coef_block, FAST_FLOAT * divisors, +; FAST_FLOAT * workspace); +; + +%define coef_block ebp+8 ; JCOEFPTR coef_block +%define divisors ebp+12 ; FAST_FLOAT * divisors +%define workspace ebp+16 ; FAST_FLOAT * workspace + + align 16 + global EXTN(jpeg_quantize_flt_sse) + +EXTN(jpeg_quantize_flt_sse): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16,7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + movhlps xmm4,xmm0 + movhlps xmm5,xmm1 + + cvtps2pi mm0,xmm0 + cvtps2pi mm1,xmm1 + cvtps2pi mm4,xmm4 + cvtps2pi mm5,xmm5 + + movhlps xmm6,xmm2 + movhlps xmm7,xmm3 + + cvtps2pi mm2,xmm2 + cvtps2pi mm3,xmm3 + cvtps2pi mm6,xmm6 + cvtps2pi mm7,xmm7 + + packssdw mm0,mm4 + packssdw mm1,mm5 + packssdw mm2,mm6 + packssdw mm3,mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +%endif ; JFDCT_FLT_SSE_MMX_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jcsammmx.asm b/jcsammmx.asm new file mode 100644 index 0000000..95fc825 --- /dev/null +++ b/jcsammmx.asm @@ -0,0 +1,328 @@ +; +; jcsammmx.asm - downsampling (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 23, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%ifdef JCSAMPLE_MMX_SUPPORTED + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jpeg_h2v1_downsample_mmx (j_compress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data(b) (b)+20 ; JSAMPARRAY output_data + + align 16 + global EXTN(jpeg_h2v1_downsample_mmx) + +EXTN(jpeg_h2v1_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, POINTER [compptr(ebp)] + mov ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jcstruct_image_width(edx)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, POINTER [cinfo(ebp)] + mov eax, INT [jcstruct_max_v_samp_factor(eax)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_v_samp_factor(eax)] ; rowctr + test eax,eax + jle short .return + + mov edx, 0x00010000 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={0, 1, 0, 1} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] + movq mm2,mm0 + movq mm3,mm1 + + pand mm0,mm6 + psrlw mm2,BYTE_BIT + pand mm1,mm6 + psrlw mm3,BYTE_BIT + + paddw mm0,mm2 + paddw mm1,mm3 + paddw mm0,mm7 + paddw mm1,mm7 + psrlw mm0,1 + psrlw mm1,1 + + packuswb mm0,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz short .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jpeg_h2v2_downsample_mmx (j_compress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data(b) (b)+20 ; JSAMPARRAY output_data + + align 16 + global EXTN(jpeg_h2v2_downsample_mmx) + +EXTN(jpeg_h2v2_downsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, POINTER [compptr(ebp)] + mov ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jcstruct_image_width(edx)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, POINTER [cinfo(ebp)] + mov eax, INT [jcstruct_max_v_samp_factor(eax)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_v_samp_factor(eax)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd mm7,edx + pcmpeqw mm6,mm6 + punpckldq mm7,mm7 ; mm7={1, 2, 1, 2} + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] + movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm4,mm0 + movq mm5,mm1 + pand mm0,mm6 + psrlw mm4,BYTE_BIT + pand mm1,mm6 + psrlw mm5,BYTE_BIT + paddw mm0,mm4 + paddw mm1,mm5 + + movq mm4,mm2 + movq mm5,mm3 + pand mm2,mm6 + psrlw mm4,BYTE_BIT + pand mm3,mm6 + psrlw mm5,BYTE_BIT + paddw mm2,mm4 + paddw mm3,mm5 + + paddw mm0,mm1 + paddw mm2,mm3 + paddw mm0,mm7 + paddw mm2,mm7 + psrlw mm0,2 + psrlw mm2,2 + + packuswb mm0,mm2 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add edx, byte 2*SIZEOF_MMWORD ; inptr0 + add esi, byte 2*SIZEOF_MMWORD ; inptr1 + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz near .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +%endif ; JCSAMPLE_MMX_SUPPORTED diff --git a/jcsample.c b/jcsample.c index 212ec87..9af7f15 100644 --- a/jcsample.c +++ b/jcsample.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 5, 2006 + * --------------------------------------------------------------------- + * * This file contains downsampling routines. * * Downsampling input data is counted in "row groups". A row group @@ -48,6 +55,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jcolsamp.h" /* Private declarations */ /* Pointer to routine to downsample a single component */ @@ -467,6 +475,7 @@ jinit_downsampler (j_compress_ptr cinfo) int ci; jpeg_component_info * compptr; boolean smoothok = TRUE; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -494,7 +503,17 @@ jinit_downsampler (j_compress_ptr cinfo) } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && compptr->v_samp_factor == cinfo->max_v_samp_factor) { smoothok = FALSE; - downsample->methods[ci] = h2v1_downsample; +#ifdef JCSAMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + downsample->methods[ci] = jpeg_h2v1_downsample_sse2; + else +#endif +#ifdef JCSAMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + downsample->methods[ci] = jpeg_h2v1_downsample_mmx; + else +#endif + downsample->methods[ci] = h2v1_downsample; } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { #ifdef INPUT_SMOOTHING_SUPPORTED @@ -502,6 +521,16 @@ jinit_downsampler (j_compress_ptr cinfo) downsample->methods[ci] = h2v2_smooth_downsample; downsample->pub.need_context_rows = TRUE; } else +#endif +#ifdef JCSAMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + downsample->methods[ci] = jpeg_h2v2_downsample_sse2; + else +#endif +#ifdef JCSAMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + downsample->methods[ci] = jpeg_h2v2_downsample_mmx; + else #endif downsample->methods[ci] = h2v2_downsample; } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 && @@ -517,3 +546,25 @@ jinit_downsampler (j_compress_ptr cinfo) TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL); #endif } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_downsampler (j_compress_ptr cinfo) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + +#ifdef JCSAMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + return JSIMD_SSE2; +#endif +#ifdef JCSAMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + + return JSIMD_NONE; +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jcsamss2.asm b/jcsamss2.asm new file mode 100644 index 0000000..e187d63 --- /dev/null +++ b/jcsamss2.asm @@ -0,0 +1,355 @@ +; +; jcsamss2.asm - downsampling (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : January 23, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%ifdef JCSAMPLE_SSE2_SUPPORTED + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jpeg_h2v1_downsample_sse2 (j_compress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data(b) (b)+20 ; JSAMPARRAY output_data + + align 16 + global EXTN(jpeg_h2v1_downsample_sse2) + +EXTN(jpeg_h2v1_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, POINTER [compptr(ebp)] + mov ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jcstruct_image_width(edx)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, POINTER [cinfo(ebp)] + mov eax, INT [jcstruct_max_v_samp_factor(eax)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_v_samp_factor(eax)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm1,xmm1 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2,xmm0 + movdqa xmm3,xmm1 + + pand xmm0,xmm6 + psrlw xmm2,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm3,BYTE_BIT + + paddw xmm0,xmm2 + paddw xmm1,xmm3 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + psrlw xmm0,1 + psrlw xmm1,1 + + packuswb xmm0,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + test ecx,ecx + jnz short .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jpeg_h2v2_downsample_sse2 (j_compress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, JSAMPARRAY output_data); +; + +%define cinfo(b) (b)+8 ; j_compress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data(b) (b)+20 ; JSAMPARRAY output_data + + align 16 + global EXTN(jpeg_h2v2_downsample_sse2) + +EXTN(jpeg_h2v2_downsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, POINTER [compptr(ebp)] + mov ecx, JDIMENSION [jcompinfo_width_in_blocks(ecx)] + shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jcstruct_image_width(edx)] + + ; -- expand_right_edge + + push ecx + shl ecx,1 ; output_cols * 2 + sub ecx,edx + jle short .expand_end + + mov eax, POINTER [cinfo(ebp)] + mov eax, INT [jcstruct_max_v_samp_factor(eax)] + test eax,eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16,7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi,edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_v_samp_factor(eax)] ; rowctr + test eax,eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd xmm7,edx + pcmpeqw xmm6,xmm6 + pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16,7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16,7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm2,xmm2 + pxor xmm3,xmm3 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16,7 + +.columnloop: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + pand xmm0,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm1,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm0,xmm4 + paddw xmm1,xmm5 + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + pand xmm2,xmm6 + psrlw xmm4,BYTE_BIT + pand xmm3,xmm6 + psrlw xmm5,BYTE_BIT + paddw xmm2,xmm4 + paddw xmm3,xmm5 + + paddw xmm0,xmm1 + paddw xmm2,xmm3 + paddw xmm0,xmm7 + paddw xmm2,xmm7 + psrlw xmm0,2 + psrlw xmm2,2 + + packuswb xmm0,xmm2 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add edx, byte 2*SIZEOF_XMMWORD ; inptr0 + add esi, byte 2*SIZEOF_XMMWORD ; inptr1 + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx,ecx + jnz near .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +%endif ; JCSAMPLE_SSE2_SUPPORTED diff --git a/jdcoefct.c b/jdcoefct.c index 4938d20..1a515d3 100644 --- a/jdcoefct.c +++ b/jdcoefct.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : December 18, 2005 + * --------------------------------------------------------------------- + * * This file contains the coefficient buffer controller for decompression. * This controller is the top level of the JPEG decompressor proper. * The coefficient buffer lies between entropy decoding and inverse-DCT steps. @@ -133,6 +140,11 @@ start_output_pass (j_decompress_ptr cinfo) } +#ifndef NEED_FAR_POINTERS +#undef jzero_far +#define jzero_far(target, bytestozero) MEMZERO(target, bytestozero) +#endif + /* * Decompress and return some data in the single-pass case. * Always attempts to emit one fully interleaved MCU row ("iMCU" row). @@ -150,15 +162,61 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; - int blkn, ci, xindex, yindex, yoffset, useful_width; + int blkn, ci, ctr, xindex, yindex, yoffset; JSAMPARRAY output_ptr; - JDIMENSION start_col, output_col; + JDIMENSION output_col; jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; + JSAMPARRAY output_ptr_blk[D_MAX_BLOCKS_IN_MCU]; + JDIMENSION output_col_off[D_MAX_BLOCKS_IN_MCU]; + jpeg_component_info *compptr_blk[D_MAX_BLOCKS_IN_MCU]; + inverse_DCT_method_ptr inverse_DCT_blk_1[D_MAX_BLOCKS_IN_MCU]; + inverse_DCT_method_ptr inverse_DCT_blk_2[D_MAX_BLOCKS_IN_MCU]; + inverse_DCT_method_ptr *inverse_DCT_blk; /* Loop to process as much as one whole iMCU row */ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { + /* Determine where data should go in output_buf and do the IDCT thing. + * We skip dummy blocks at the right and bottom edges (but blkn gets + * incremented past them!). Note the inner loop relies on having + * allocated the MCU_buffer[] blocks sequentially. + */ + blkn = 0; /* index of current DCT block within MCU */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Don't bother to IDCT an uninteresting component. */ + if (! compptr->component_needed) { + for (ctr = compptr->MCU_blocks; ctr > 0; ctr--) { + inverse_DCT_blk_1[blkn] = inverse_DCT_blk_2[blkn] = NULL; + blkn++; + } + continue; + } + inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; + output_ptr = output_buf[compptr->component_index] + + yoffset * compptr->DCT_scaled_size; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + if (cinfo->input_iMCU_row < last_iMCU_row || + yoffset+yindex < compptr->last_row_height) { + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + compptr_blk[blkn] = compptr; + output_ptr_blk[blkn] = output_ptr; + output_col_off[blkn] = xindex * compptr->DCT_scaled_size; + inverse_DCT_blk_1[blkn] = inverse_DCT; + inverse_DCT_blk_2[blkn] = (xindex < compptr->last_col_width) ? + inverse_DCT : NULL; + blkn++; + } + } else { + for (ctr = compptr->MCU_width; ctr > 0; ctr--) { + inverse_DCT_blk_1[blkn] = inverse_DCT_blk_2[blkn] = NULL; + blkn++; + } + } + output_ptr += compptr->DCT_scaled_size; + } + } for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col; MCU_col_num++) { /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ @@ -170,39 +228,17 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) coef->MCU_ctr = MCU_col_num; return JPEG_SUSPENDED; } - /* Determine where data should go in output_buf and do the IDCT thing. - * We skip dummy blocks at the right and bottom edges (but blkn gets - * incremented past them!). Note the inner loop relies on having - * allocated the MCU_buffer[] blocks sequentially. - */ - blkn = 0; /* index of current DCT block within MCU */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) { - blkn += compptr->MCU_blocks; + inverse_DCT_blk = (MCU_col_num < last_MCU_col) ? inverse_DCT_blk_1 + : inverse_DCT_blk_2; + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + inverse_DCT = inverse_DCT_blk[blkn]; + if (inverse_DCT == NULL) continue; - } - inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; - useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; - output_ptr = output_buf[compptr->component_index] + - yoffset * compptr->DCT_scaled_size; - start_col = MCU_col_num * compptr->MCU_sample_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - if (cinfo->input_iMCU_row < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { - output_col = start_col; - for (xindex = 0; xindex < useful_width; xindex++) { - (*inverse_DCT) (cinfo, compptr, - (JCOEFPTR) coef->MCU_buffer[blkn+xindex], - output_ptr, output_col); - output_col += compptr->DCT_scaled_size; - } - } - blkn += compptr->MCU_width; - output_ptr += compptr->DCT_scaled_size; - } + compptr = compptr_blk[blkn]; + output_col = MCU_col_num * compptr->MCU_sample_width + + output_col_off[blkn]; + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) coef->MCU_buffer[blkn], + output_ptr_blk[blkn], output_col); } } /* Completed an MCU row, but perhaps not an iMCU row */ @@ -250,6 +286,8 @@ consume_data (j_decompress_ptr cinfo) JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN]; JBLOCKROW buffer_ptr; jpeg_component_info *compptr; + int MCU_width[D_MAX_BLOCKS_IN_MCU]; + JBLOCKROW MCU_buffer_base[D_MAX_BLOCKS_IN_MCU]; /* Align the virtual buffers for the components used in this scan. */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { @@ -267,19 +305,24 @@ consume_data (j_decompress_ptr cinfo) /* Loop to process one whole iMCU row */ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; yoffset++) { + /* Construct list of pointers to DCT blocks belonging to this MCU */ + blkn = 0; /* index of current DCT block within MCU */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + for (yindex = 0; yindex < compptr->MCU_height; yindex++) { + buffer_ptr = buffer[ci][yindex+yoffset]; + for (xindex = 0; xindex < compptr->MCU_width; xindex++) { + MCU_width[blkn] = compptr->MCU_width; + MCU_buffer_base[blkn] = buffer_ptr++; + blkn++; + } + } + } for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row; MCU_col_num++) { - /* Construct list of pointers to DCT blocks belonging to this MCU */ - blkn = 0; /* index of current DCT block within MCU */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - start_col = MCU_col_num * compptr->MCU_width; - for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; - for (xindex = 0; xindex < compptr->MCU_width; xindex++) { - coef->MCU_buffer[blkn++] = buffer_ptr++; - } - } + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + start_col = MCU_col_num * MCU_width[blkn]; + coef->MCU_buffer[blkn] = MCU_buffer_base[blkn] + start_col; } /* Try to fetch the MCU. */ if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { @@ -453,6 +496,15 @@ smoothing_ok (j_decompress_ptr cinfo) } +/* + * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address + * is aligned to a 16-byte boundary; if not, a general-protection exception + * (#GP) is generated. + */ + +#define ALIGN_SIZE 16 /* sizeof SSE/SSE2 register */ +#define ALIGN_MEM(p,a) ((void *) (((size_t) (p) + (a) - 1) & -(a))) + /* * Variant of decompress_data for use when doing block smoothing. */ @@ -471,7 +523,8 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; boolean first_row, last_row; - JBLOCK workspace; + JCOEF workspace[DCTSIZE2 + ALIGN_SIZE/sizeof(JCOEF)]; + JCOEF * workptr = (JCOEF *) ALIGN_MEM(workspace, ALIGN_SIZE); int *coef_bits; JQUANT_TBL *quanttbl; INT32 Q00,Q01,Q02,Q10,Q11,Q20, num; @@ -560,7 +613,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) last_block_column = compptr->width_in_blocks - 1; for (block_num = 0; block_num <= last_block_column; block_num++) { /* Fetch current DCT block into workspace so we can modify it. */ - jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); + jcopy_block_row(buffer_ptr, (JBLOCKROW) workptr, (JDIMENSION) 1); /* Update DC values */ if (block_num < last_block_column) { DC3 = (int) prev_block_row[1][0]; @@ -572,7 +625,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) * and is not known to be fully accurate. */ /* AC01 */ - if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { + if ((Al=coef_bits[1]) != 0 && workptr[1] == 0) { num = 36 * Q00 * (DC4 - DC6); if (num >= 0) { pred = (int) (((Q01<<7) + num) / (Q01<<8)); @@ -584,10 +637,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) pred = (1<= 0) { pred = (int) (((Q10<<7) + num) / (Q10<<8)); @@ -599,10 +652,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) pred = (1<= 0) { pred = (int) (((Q20<<7) + num) / (Q20<<8)); @@ -614,10 +667,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) pred = (1<= 0) { pred = (int) (((Q11<<7) + num) / (Q11<<8)); @@ -629,10 +682,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) pred = (1<= 0) { pred = (int) (((Q02<<7) + num) / (Q02<<8)); @@ -644,10 +697,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) pred = (1<mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -358,8 +384,23 @@ jinit_color_deconverter (j_decompress_ptr cinfo) case JCS_RGB: cinfo->out_color_components = RGB_PIXELSIZE; if (cinfo->jpeg_color_space == JCS_YCbCr) { - cconvert->pub.color_convert = ycc_rgb_convert; - build_ycc_rgb_table(cinfo); +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_ycc_rgb_convert_sse2)) { + cconvert->pub.color_convert = jpeg_ycc_rgb_convert_sse2; + } else +#endif +#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + cconvert->pub.color_convert = jpeg_ycc_rgb_convert_mmx; + } else +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + { + cconvert->pub.color_convert = ycc_rgb_convert; + build_ycc_rgb_table(cinfo); + } } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { cconvert->pub.color_convert = gray_rgb_convert; } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) { @@ -394,3 +435,28 @@ jinit_color_deconverter (j_decompress_ptr cinfo) else cinfo->output_components = cinfo->out_color_components; } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_color_deconverter (j_decompress_ptr cinfo) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_ycc_rgb_convert_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + + return JSIMD_NONE; +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jdcolss2.asm b/jdcolss2.asm new file mode 100644 index 0000000..fd6f04d --- /dev/null +++ b/jdcolss2.asm @@ -0,0 +1,536 @@ +; +; jdcolss2.asm - colorspace conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_ycc_rgb_convert_sse2) + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jpeg_ycc_rgb_convert_sse2 (j_decompress_ptr cinfo, +; JSAMPIMAGE input_buf, JDIMENSION input_row, +; JSAMPARRAY output_buf, int num_rows) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define input_row(b) (b)+16 ; JDIMENSION input_row +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define num_rows(b) (b)+24 ; int num_rows + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_ycc_rgb_convert_sse2) + +EXTN(jpeg_ycc_rgb_convert_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jdstruct_output_width(ecx)] ; num_cols + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax,eax + jle near .return + alignx 16,7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16,7 +.columnloop: + + movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + pcmpeqw xmm7,xmm7 + psrlw xmm4,BYTE_BIT + psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4,xmm7 + paddw xmm5,xmm7 + paddw xmm0,xmm7 + paddw xmm1,xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2,xmm4 ; xmm2=CbE + movdqa xmm3,xmm5 ; xmm3=CbO + paddw xmm4,xmm4 ; xmm4=2*CbE + paddw xmm5,xmm5 ; xmm5=2*CbO + movdqa xmm6,xmm0 ; xmm6=CrE + movdqa xmm7,xmm1 ; xmm7=CrO + paddw xmm0,xmm0 ; xmm0=2*CrE + paddw xmm1,xmm1 ; xmm1=2*CrO + + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4,[GOTOFF(eax,PW_ONE)] + paddw xmm5,[GOTOFF(eax,PW_ONE)] + psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0,[GOTOFF(eax,PW_ONE)] + paddw xmm1,[GOTOFF(eax,PW_ONE)] + psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4,xmm2 + paddw xmm5,xmm3 + paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4,xmm2 + movdqa xmm5,xmm3 + punpcklwd xmm2,xmm6 + punpckhwd xmm4,xmm6 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm3,xmm7 + punpckhwd xmm5,xmm7 + pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm4,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm4,SCALEBITS + paddd xmm3,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm3,SCALEBITS + psrad xmm5,SCALEBITS + + packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4,xmm4 + psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + jmp short .out0 +.out1: ; --(unaligned)----------------- + pcmpeqb xmmH,xmmH ; xmmH=(all 1's) + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF + add edi, byte SIZEOF_XMMWORD ; outptr +.out0: + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + pcmpeqb xmmH,xmmH ; xmmH=(all 1's) + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + mov eax,ecx + xor ecx, byte 0x0F + shl ecx, 2 + movd xmmB,ecx + psrlq xmmH,4 + pcmpeqb xmmE,xmmE + psrlq xmmH,xmmB + psrlq xmmE,xmmB + punpcklbw xmmE,xmmH + ; ---------------- + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0 + add eax,ecx + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0 + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx + movdqa xmmG,xmmA + movdqa xmmC,xmmE + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmE, SIZEOF_XMMWORD/2 + movd xmmD,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1 + movd xmmF,ecx + psllq xmmA,xmmF + psllq xmmE,xmmF + jmp short .adj0 +.adj1: neg ecx + movd xmmF,ecx + psrlq xmmA,xmmF + psrlq xmmE,xmmF + psllq xmmG,xmmD + psllq xmmC,xmmD + por xmmA,xmmG + por xmmE,xmmC +.adj0: ; ---------------- + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + jmp short .out0 +.out1: ; --(unaligned)----------------- + pcmpeqb xmmE,xmmE ; xmmE=(all 1's) + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr +.out0: + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + pcmpeqb xmmE,xmmE ; xmmE=(all 1's) + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + cmp ecx, byte SIZEOF_XMMWORD/16 + jb short .nextrow + mov eax,ecx + xor ecx, byte 0x03 + inc ecx + shl ecx, 4 + movd xmmF,ecx + psrlq xmmE,xmmF + punpcklbw xmmE,xmmE + ; ---------------- + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0 + lea eax, [ecx+eax*4] ; RGB_PIXELSIZE + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0 + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx + movdqa xmmB,xmmA + movdqa xmmG,xmmE + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmE, SIZEOF_XMMWORD/2 + movd xmmC,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1 + movd xmmH,ecx + psllq xmmA,xmmH + psllq xmmE,xmmH + jmp short .adj0 +.adj1: neg ecx + movd xmmH,ecx + psrlq xmmA,xmmH + psrlq xmmE,xmmH + psllq xmmB,xmmC + psllq xmmG,xmmC + por xmmA,xmmB + por xmmE,xmmG +.adj0: ; ---------------- + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16,7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JDCOLOR_YCCRGB_SSE2_SUPPORTED +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 diff --git a/jdct.h b/jdct.h index 04192a2..678a3d1 100644 --- a/jdct.h +++ b/jdct.h @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 5, 2006 + * --------------------------------------------------------------------- + * * This include file contains common declarations for the forward and * inverse DCT modules. These declarations are private to the DCT managers * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms. @@ -13,6 +20,13 @@ */ +/* SIMD Ext: configuration check */ + +#if BITS_IN_JSAMPLE != 8 +#error "Sorry, this SIMD code only copes with 8-bit sample values." +#endif + + /* * A forward DCT routine is given a pointer to a work area of type DCTELEM[]; * the DCT is to be performed in-place in that buffer. Type DCTELEM is int @@ -26,14 +40,25 @@ * Quantization of the output coefficients is done by jcdctmgr.c. */ -#if BITS_IN_JSAMPLE == 8 -typedef int DCTELEM; /* 16 or 32 bits is fine */ -#else -typedef INT32 DCTELEM; /* must have 32 bits */ -#endif +/* SIMD Ext: To maximize parallelism, Type DCTELEM is changed to short + * (originally, int). + */ +typedef short DCTELEM; /* SIMD Ext: must be short */ typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); +typedef JMETHOD(void, convsamp_int_method_ptr, + (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM * workspace)); +typedef JMETHOD(void, convsamp_float_method_ptr, + (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace)); +typedef JMETHOD(void, quantize_int_method_ptr, + (JCOEFPTR coef_block, DCTELEM * divisors, + DCTELEM * workspace)); +typedef JMETHOD(void, quantize_float_method_ptr, + (JCOEFPTR coef_block, FAST_FLOAT * divisors, + FAST_FLOAT * workspace)); /* @@ -49,19 +74,22 @@ typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); /* typedef inverse_DCT_method_ptr is declared in jpegint.h */ +/* SIMD Ext: To maximize parallelism, Type MULTIPLIER is changed to short. + * Macro definitions of MULTIPLIER and FAST_FLOAT in jmorecfg.h are ignored. + */ +#undef MULTIPLIER +#define MULTIPLIER short /* SIMD Ext: must be short */ +#undef FAST_FLOAT +#define FAST_FLOAT float /* SIMD Ext: must be float */ + /* * Each IDCT routine has its own ideas about the best dct_table element type. */ -typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ -#if BITS_IN_JSAMPLE == 8 -typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ +typedef MULTIPLIER ISLOW_MULT_TYPE; /* SIMD Ext: must be short */ +typedef MULTIPLIER IFAST_MULT_TYPE; /* SIMD Ext: must be short */ #define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ -#else -typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ -#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ -#endif -typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ +typedef FAST_FLOAT FLOAT_MULT_TYPE; /* SIMD Ext: must be float */ /* @@ -81,15 +109,64 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ /* Short forms of external names for systems with brain-damaged linkers. */ #ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_fdct_islow jFDislow -#define jpeg_fdct_ifast jFDifast -#define jpeg_fdct_float jFDfloat -#define jpeg_idct_islow jRDislow -#define jpeg_idct_ifast jRDifast -#define jpeg_idct_float jRDfloat -#define jpeg_idct_4x4 jRD4x4 -#define jpeg_idct_2x2 jRD2x2 -#define jpeg_idct_1x1 jRD1x1 +#define jpeg_fdct_islow jFDislow /* jfdctint.asm */ +#define jpeg_fdct_ifast jFDifast /* jfdctfst.asm */ +#define jpeg_fdct_float jFDfloat /* jfdctflt.asm */ +#define jpeg_fdct_islow_mmx jFDMislow /* jfmmxint.asm */ +#define jpeg_fdct_ifast_mmx jFDMifast /* jfmmxfst.asm */ +#define jpeg_fdct_float_3dnow jFD3float /* jf3dnflt.asm */ +#define jpeg_fdct_islow_sse2 jFDSislow /* jfss2int.asm */ +#define jpeg_fdct_ifast_sse2 jFDSifast /* jfss2fst.asm */ +#define jpeg_fdct_float_sse jFDSfloat /* jfsseflt.asm */ +#define jpeg_convsamp_int jCnvInt /* jcqntint.asm */ +#define jpeg_quantize_int jQntInt /* jcqntint.asm */ +#define jpeg_quantize_idiv jQntIDiv /* jcqntint.asm */ +#define jpeg_convsamp_float jCnvFloat /* jcqntflt.asm */ +#define jpeg_quantize_float jQntFloat /* jcqntflt.asm */ +#define jpeg_convsamp_int_mmx jCnvMmx /* jcqntmmx.asm */ +#define jpeg_quantize_int_mmx jQntMmx /* jcqntmmx.asm */ +#define jpeg_convsamp_flt_3dnow jCnv3dnow /* jcqnt3dn.asm */ +#define jpeg_quantize_flt_3dnow jQnt3dnow /* jcqnt3dn.asm */ +#define jpeg_convsamp_int_sse2 jCnvISse2 /* jcqnts2i.asm */ +#define jpeg_quantize_int_sse2 jQntISse2 /* jcqnts2i.asm */ +#define jpeg_convsamp_flt_sse jCnvSse /* jcqntsse.asm */ +#define jpeg_quantize_flt_sse jQntSse /* jcqntsse.asm */ +#define jpeg_convsamp_flt_sse2 jCnvFSse2 /* jcqnts2f.asm */ +#define jpeg_quantize_flt_sse2 jQntFSse2 /* jcqnts2f.asm */ +#define jpeg_idct_islow jRDislow /* jidctint.asm */ +#define jpeg_idct_ifast jRDifast /* jidctfst.asm */ +#define jpeg_idct_float jRDfloat /* jidctflt.asm */ +#define jpeg_idct_4x4 jRD4x4 /* jidctred.asm */ +#define jpeg_idct_2x2 jRD2x2 /* jidctred.asm */ +#define jpeg_idct_1x1 jRD1x1 /* jidctred.asm */ +#define jpeg_idct_islow_mmx jRDMislow /* jimmxint.asm */ +#define jpeg_idct_ifast_mmx jRDMifast /* jimmxfst.asm */ +#define jpeg_idct_float_3dnow jRD3float /* ji3dnflt.asm */ +#define jpeg_idct_4x4_mmx jRDM4x4 /* jimmxred.asm */ +#define jpeg_idct_2x2_mmx jRDM2x2 /* jimmxred.asm */ +#define jpeg_idct_islow_sse2 jRDSislow /* jiss2int.asm */ +#define jpeg_idct_ifast_sse2 jRDSifast /* jiss2fst.asm */ +#define jpeg_idct_float_sse jRDSfloat /* jisseflt.asm */ +#define jpeg_idct_float_sse2 jRD2float /* jiss2flt.asm */ +#define jpeg_idct_4x4_sse2 jRDS4x4 /* jiss2red.asm */ +#define jpeg_idct_2x2_sse2 jRDS2x2 /* jiss2red.asm */ +#define jconst_fdct_float jFCfloat /* jfdctflt.asm */ +#define jconst_fdct_islow_mmx jFCMislow /* jfmmxint.asm */ +#define jconst_fdct_ifast_mmx jFCMifast /* jfmmxfst.asm */ +#define jconst_fdct_float_3dnow jFC3float /* jf3dnflt.asm */ +#define jconst_fdct_islow_sse2 jFCSislow /* jfss2int.asm */ +#define jconst_fdct_ifast_sse2 jFCSifast /* jfss2fst.asm */ +#define jconst_fdct_float_sse jFCSfloat /* jfsseflt.asm */ +#define jconst_idct_float jRCfloat /* jidctflt.asm */ +#define jconst_idct_islow_mmx jRCMislow /* jimmxint.asm */ +#define jconst_idct_ifast_mmx jRCMifast /* jimmxfst.asm */ +#define jconst_idct_float_3dnow jRC3float /* ji3dnflt.asm */ +#define jconst_idct_red_mmx jRCMred /* jimmxred.asm */ +#define jconst_idct_islow_sse2 jRCSislow /* jiss2int.asm */ +#define jconst_idct_ifast_sse2 jRCSifast /* jiss2fst.asm */ +#define jconst_idct_float_sse jRCSfloat /* jisseflt.asm */ +#define jconst_idct_float_sse2 jRC2float /* jiss2flt.asm */ +#define jconst_idct_red_sse2 jRCSred /* jiss2red.asm */ #endif /* NEED_SHORT_EXTERNAL_NAMES */ /* Extern declarations for the forward and inverse DCT routines. */ @@ -98,6 +175,47 @@ EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data)); EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data)); EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data)); +EXTERN(void) jpeg_fdct_islow_mmx JPP((DCTELEM * data)); +EXTERN(void) jpeg_fdct_ifast_mmx JPP((DCTELEM * data)); +EXTERN(void) jpeg_fdct_float_3dnow JPP((FAST_FLOAT * data)); + +EXTERN(void) jpeg_fdct_islow_sse2 JPP((DCTELEM * data)); +EXTERN(void) jpeg_fdct_ifast_sse2 JPP((DCTELEM * data)); +EXTERN(void) jpeg_fdct_float_sse JPP((FAST_FLOAT * data)); + +EXTERN(void) jpeg_convsamp_int + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)); +EXTERN(void) jpeg_quantize_int + JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)); +EXTERN(void) jpeg_quantize_idiv + JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)); +EXTERN(void) jpeg_convsamp_float + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace)); +EXTERN(void) jpeg_quantize_float + JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)); + +EXTERN(void) jpeg_convsamp_int_mmx + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)); +EXTERN(void) jpeg_quantize_int_mmx + JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)); +EXTERN(void) jpeg_convsamp_flt_3dnow + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace)); +EXTERN(void) jpeg_quantize_flt_3dnow + JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)); + +EXTERN(void) jpeg_convsamp_int_sse2 + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)); +EXTERN(void) jpeg_quantize_int_sse2 + JPP((JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)); +EXTERN(void) jpeg_convsamp_flt_sse + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace)); +EXTERN(void) jpeg_quantize_flt_sse + JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)); +EXTERN(void) jpeg_convsamp_flt_sse2 + JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace)); +EXTERN(void) jpeg_quantize_flt_sse2 + JPP((JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)); + EXTERN(void) jpeg_idct_islow JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); @@ -117,6 +235,60 @@ EXTERN(void) jpeg_idct_1x1 JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_islow_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_ifast_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_4x4_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_2x2_mmx + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + +EXTERN(void) jpeg_idct_float_3dnow + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_float_sse + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_float_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + +EXTERN(void) jpeg_idct_islow_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_ifast_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_4x4_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_2x2_sse2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + +extern const int jconst_fdct_float[]; +extern const int jconst_fdct_islow_mmx[]; +extern const int jconst_fdct_ifast_mmx[]; +extern const int jconst_fdct_float_3dnow[]; +extern const int jconst_fdct_islow_sse2[]; +extern const int jconst_fdct_ifast_sse2[]; +extern const int jconst_fdct_float_sse[]; +extern const int jconst_idct_float[]; +extern const int jconst_idct_islow_mmx[]; +extern const int jconst_idct_ifast_mmx[]; +extern const int jconst_idct_float_3dnow[]; +extern const int jconst_idct_red_mmx[]; +extern const int jconst_idct_islow_sse2[]; +extern const int jconst_idct_ifast_sse2[]; +extern const int jconst_idct_float_sse[]; +extern const int jconst_idct_float_sse2[]; +extern const int jconst_idct_red_sse2[]; + /* * Macros for handling fixed-point arithmetic; these are used by many diff --git a/jdct.inc b/jdct.inc new file mode 100644 index 0000000..a6fb0ed --- /dev/null +++ b/jdct.inc @@ -0,0 +1,125 @@ +; +; jdct.inc - private declarations for forward & reverse DCT subsystems +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; Last Modified : January 5, 2006 +; +; [TAB8] + +; ---- jdct.h -------------------------------------------------------------- +; +; configuration check: BITS_IN_JSAMPLE==8 (8-bit sample values) is the only +; valid setting on this SIMD extension. +; +%if BITS_IN_JSAMPLE != 8 +%error "Sorry, this SIMD code only copes with 8-bit sample values." +%endif + +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) + +; To maximize parallelism, Type MULTIPLIER is changed to short. +; +%define MULTIPLIER word ; short +%define SIZEOF_MULTIPLIER SIZEOF_WORD ; sizeof(MULTIPLIER) +%define FAST_FLOAT FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT) + +; Each IDCT routine has its own ideas about the best dct_table element type. +; +%define ISLOW_MULT_TYPE MULTIPLIER ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_MULTIPLIER ; sizeof(ISLOW_MULT_TYPE) +%define IFAST_MULT_TYPE MULTIPLIER ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_MULTIPLIER ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors +%define FLOAT_MULT_TYPE FAST_FLOAT ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FAST_FLOAT ; sizeof(FLOAT_MULT_TYPE) + +; Each IDCT routine is responsible for range-limiting its results and +; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could +; be quite far out of range if the input data is corrupt, so a bulletproof +; range-limiting step is required. We use a mask-and-table-lookup method +; to do the combined operations quickly. +; +%define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples + +; Short forms of external names for systems with brain-damaged linkers. +; +%ifdef NEED_SHORT_EXTERNAL_NAMES +%define jpeg_fdct_islow jFDislow ; jfdctint.asm +%define jpeg_fdct_ifast jFDifast ; jfdctfst.asm +%define jpeg_fdct_float jFDfloat ; jfdctflt.asm +%define jpeg_fdct_islow_mmx jFDMislow ; jfmmxint.asm +%define jpeg_fdct_ifast_mmx jFDMifast ; jfmmxfst.asm +%define jpeg_fdct_float_3dnow jFD3float ; jf3dnflt.asm +%define jpeg_fdct_islow_sse2 jFDSislow ; jfss2int.asm +%define jpeg_fdct_ifast_sse2 jFDSifast ; jfss2fst.asm +%define jpeg_fdct_float_sse jFDSfloat ; jfsseflt.asm +%define jpeg_convsamp_int jCnvInt ; jcqntint.asm +%define jpeg_quantize_int jQntInt ; jcqntint.asm +%define jpeg_quantize_idiv jQntIDiv ; jcqntint.asm +%define jpeg_convsamp_float jCnvFloat ; jcqntflt.asm +%define jpeg_quantize_float jQntFloat ; jcqntflt.asm +%define jpeg_convsamp_int_mmx jCnvMmx ; jcqntmmx.asm +%define jpeg_quantize_int_mmx jQntMmx ; jcqntmmx.asm +%define jpeg_convsamp_flt_3dnow jCnv3dnow ; jcqnt3dn.asm +%define jpeg_quantize_flt_3dnow jQnt3dnow ; jcqnt3dn.asm +%define jpeg_convsamp_int_sse2 jCnvISse2 ; jcqnts2i.asm +%define jpeg_quantize_int_sse2 jQntISse2 ; jcqnts2i.asm +%define jpeg_convsamp_flt_sse jCnvSse ; jcqntsse.asm +%define jpeg_quantize_flt_sse jQntSse ; jcqntsse.asm +%define jpeg_convsamp_flt_sse2 jCnvFSse2 ; jcqnts2f.asm +%define jpeg_quantize_flt_sse2 jQntFSse2 ; jcqnts2f.asm +%define jpeg_idct_islow jRDislow ; jidctint.asm +%define jpeg_idct_ifast jRDifast ; jidctfst.asm +%define jpeg_idct_float jRDfloat ; jidctflt.asm +%define jpeg_idct_4x4 jRD4x4 ; jidctred.asm +%define jpeg_idct_2x2 jRD2x2 ; jidctred.asm +%define jpeg_idct_1x1 jRD1x1 ; jidctred.asm +%define jpeg_idct_islow_mmx jRDMislow ; jimmxint.asm +%define jpeg_idct_ifast_mmx jRDMifast ; jimmxfst.asm +%define jpeg_idct_float_3dnow jRD3float ; ji3dnflt.asm +%define jpeg_idct_4x4_mmx jRDM4x4 ; jimmxred.asm +%define jpeg_idct_2x2_mmx jRDM2x2 ; jimmxred.asm +%define jpeg_idct_islow_sse2 jRDSislow ; jiss2int.asm +%define jpeg_idct_ifast_sse2 jRDSifast ; jiss2fst.asm +%define jpeg_idct_float_sse jRDSfloat ; jisseflt.asm +%define jpeg_idct_float_sse2 jRD2float ; jiss2flt.asm +%define jpeg_idct_4x4_sse2 jRDS4x4 ; jiss2red.asm +%define jpeg_idct_2x2_sse2 jRDS2x2 ; jiss2red.asm +%define jconst_fdct_float jFCfloat ; jfdctflt.asm +%define jconst_fdct_islow_mmx jFCMislow ; jfmmxint.asm +%define jconst_fdct_ifast_mmx jFCMifast ; jfmmxfst.asm +%define jconst_fdct_float_3dnow jFC3float ; jf3dnflt.asm +%define jconst_fdct_islow_sse2 jFCSislow ; jfss2int.asm +%define jconst_fdct_ifast_sse2 jFCSifast ; jfss2fst.asm +%define jconst_fdct_float_sse jFCSfloat ; jfsseflt.asm +%define jconst_idct_float jRCfloat ; jidctflt.asm +%define jconst_idct_islow_mmx jRCMislow ; jimmxint.asm +%define jconst_idct_ifast_mmx jRCMifast ; jimmxfst.asm +%define jconst_idct_float_3dnow jRC3float ; ji3dnflt.asm +%define jconst_idct_red_mmx jRCMred ; jimmxred.asm +%define jconst_idct_islow_sse2 jRCSislow ; jiss2int.asm +%define jconst_idct_ifast_sse2 jRCSifast ; jiss2fst.asm +%define jconst_idct_float_sse jRCSfloat ; jisseflt.asm +%define jconst_idct_float_sse2 jRC2float ; jiss2flt.asm +%define jconst_idct_red_sse2 jRCSred ; jiss2red.asm +%endif ; NEED_SHORT_EXTERNAL_NAMES + +; -------------------------------------------------------------------------- + +%define ROW(n,b,s) ((b)+(n)*(s)) +%define COL(n,b,s) ((b)+(n)*(s)*DCTSIZE) + +%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD) +%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD) +%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD) + +; -------------------------------------------------------------------------- diff --git a/jddctmgr.c b/jddctmgr.c index bbf8d0e..de6df8d 100644 --- a/jddctmgr.c +++ b/jddctmgr.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : December 24, 2005 + * --------------------------------------------------------------------- + * * This file contains the inverse-DCT management logic. * This code selects a particular IDCT implementation to be used, * and it performs related housekeeping chores. No code in this file @@ -94,6 +101,7 @@ start_pass (j_decompress_ptr cinfo) int method = 0; inverse_DCT_method_ptr method_ptr = NULL; JQUANT_TBL * qtbl; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -105,34 +113,95 @@ start_pass (j_decompress_ptr cinfo) method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 2: - method_ptr = jpeg_idct_2x2; +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_red_sse2)) + method_ptr = jpeg_idct_2x2_sse2; + else +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + method_ptr = jpeg_idct_2x2_mmx; + else +#endif + method_ptr = jpeg_idct_2x2; method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; case 4: - method_ptr = jpeg_idct_4x4; +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_red_sse2)) + method_ptr = jpeg_idct_4x4_sse2; + else +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + method_ptr = jpeg_idct_4x4_mmx; + else +#endif + method_ptr = jpeg_idct_4x4; method = JDCT_ISLOW; /* jidctred uses islow-style table */ break; -#endif +#endif /* IDCT_SCALING_SUPPORTED */ case DCTSIZE: switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: - method_ptr = jpeg_idct_islow; +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_islow_sse2)) + method_ptr = jpeg_idct_islow_sse2; + else +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + method_ptr = jpeg_idct_islow_mmx; + else +#endif + method_ptr = jpeg_idct_islow; method = JDCT_ISLOW; break; -#endif +#endif /* DCT_ISLOW_SUPPORTED */ #ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: - method_ptr = jpeg_idct_ifast; +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_ifast_sse2)) + method_ptr = jpeg_idct_ifast_sse2; + else +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + method_ptr = jpeg_idct_ifast_mmx; + else +#endif + method_ptr = jpeg_idct_ifast; method = JDCT_IFAST; break; -#endif +#endif /* DCT_IFAST_SUPPORTED */ #ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: - method_ptr = jpeg_idct_float; +#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED + if (simd & JSIMD_SSE && simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_float_sse2)) + method_ptr = jpeg_idct_float_sse2; + else +#endif +#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED + if (simd & JSIMD_SSE && + IS_CONST_ALIGNED_16(jconst_idct_float_sse)) + method_ptr = jpeg_idct_float_sse; + else +#endif +#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED + if (simd & JSIMD_3DNOW) + method_ptr = jpeg_idct_float_3dnow; + else +#endif + method_ptr = jpeg_idct_float; method = JDCT_FLOAT; break; -#endif +#endif /* DCT_FLOAT_SUPPORTED */ default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; @@ -267,3 +336,78 @@ jinit_inverse_dct (j_decompress_ptr cinfo) idct->cur_method[ci] = -1; } } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_inverse_dct (j_decompress_ptr cinfo, int method) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + + switch (method) { +#ifdef DCT_ISLOW_SUPPORTED + case JDCT_ISLOW: +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_islow_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + return JSIMD_NONE; +#endif /* DCT_ISLOW_SUPPORTED */ +#ifdef DCT_IFAST_SUPPORTED + case JDCT_IFAST: +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_ifast_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + return JSIMD_NONE; +#endif /* DCT_IFAST_SUPPORTED */ +#ifdef DCT_FLOAT_SUPPORTED + case JDCT_FLOAT: +#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED + if (simd & JSIMD_SSE && simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_float_sse2)) + return JSIMD_SSE; /* (JSIMD_SSE | JSIMD_SSE2); */ +#endif +#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED + if (simd & JSIMD_SSE && + IS_CONST_ALIGNED_16(jconst_idct_float_sse)) + return JSIMD_SSE; /* (JSIMD_SSE | JSIMD_MMX); */ +#endif +#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED + if (simd & JSIMD_3DNOW) + return JSIMD_3DNOW; /* (JSIMD_3DNOW | JSIMD_MMX); */ +#endif + return JSIMD_NONE; +#endif /* DCT_FLOAT_SUPPORTED */ +#ifdef IDCT_SCALING_SUPPORTED + case JDCT_FLOAT + 1: +#ifdef JIDCT_INT_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_idct_red_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + return JSIMD_NONE; +#endif /* IDCT_SCALING_SUPPORTED */ + default: + ; + } + + return JSIMD_NONE; /* not compiled */ +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jdhuff.c b/jdhuff.c index b5ba39f..4f75ebe 100644 --- a/jdhuff.c +++ b/jdhuff.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : October 31, 2004 + * --------------------------------------------------------------------- + * * This file contains Huffman entropy decoding routines. * * Much of the complexity here has to do with supporting input suspension. @@ -151,8 +158,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, { JHUFF_TBL *htbl; d_derived_tbl *dtbl; - int p, i, l, si, numsymbols; - int lookbits, ctr; + int p, i, l, la, lx, si, numsymbols; + int lookbits, look_end, sym, val, ctr; char huffsize[257]; unsigned int huffcode[257]; unsigned int code; @@ -234,18 +241,34 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, * with that code. */ - MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits)); + MEMZERO(dtbl->lookx_nbits, SIZEOF(dtbl->lookx_nbits)); p = 0; - for (l = 1; l <= HUFF_LOOKAHEAD; l++) { + for (l = 1; l <= HUFFX_LOOKAHEAD-1; l++) { for (i = 1; i <= (int) htbl->bits[l]; i++, p++) { /* l = current code's length, p = its index in huffcode[] & huffval[]. */ /* Generate left-justified code followed by all possible bit sequences */ - lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); - for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { - dtbl->look_nbits[lookbits] = l; - dtbl->look_sym[lookbits] = htbl->huffval[p]; - lookbits++; + sym = htbl->huffval[p]; /* current symbol */ + la = sym & 15; /* length of additional bits field */ + lx = HUFFX_LOOKAHEAD - l; + lookbits = huffcode[p] << lx; + look_end = lookbits + (1 << lx); + lx -= la; + while (lookbits < look_end) { + if (lx >= 0) { + val = (lookbits >> lx) & ((1 << la) - 1); + ctr = 1 << lx; + } else { + val = (lookbits << -lx) & ((1 << la) - 1); + ctr = 1; + } + val = HUFF_EXTEND(val, la); + for (; ctr > 0; ctr--) { + dtbl->lookx_nbits[lookbits] = l + la; + dtbl->lookx_val[lookbits] = val; + dtbl->lookx_sym[lookbits] = sym; + lookbits++; + } } } } @@ -271,23 +294,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, * See jdhuff.h for info about usage. * Note: current values of get_buffer and bits_left are passed as parameters, * but are returned in the corresponding fields of the state struct. - * - * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width - * of get_buffer to be used. (On machines with wider words, an even larger - * buffer could be used.) However, on some machines 32-bit shifts are - * quite slow and take time proportional to the number of places shifted. - * (This is true with most PC compilers, for instance.) In this case it may - * be a win to set MIN_GET_BITS to the minimum value of 15. This reduces the - * average shift distance at the cost of more calls to jpeg_fill_bit_buffer. */ -#ifdef SLOW_SHIFT_32 -#define MIN_GET_BITS 15 /* minimum allowable value */ -#else -#define MIN_GET_BITS (BIT_BUF_SIZE-7) -#endif - - GLOBAL(boolean) jpeg_fill_bit_buffer (bitread_working_state * state, register bit_buf_type get_buffer, register int bits_left, @@ -433,32 +441,6 @@ jpeg_huff_decode (bitread_working_state * state, } -/* - * Figure F.12: extend sign bit. - * On some machines, a shift and add will be faster than a table lookup. - */ - -#ifdef AVOID_TABLES - -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) - -#else - -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) - -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; - -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; - -#endif /* AVOID_TABLES */ - - /* * Check for a restart marker & resynchronize decoder. * Returns FALSE if must suspend. @@ -548,13 +530,59 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Decode a single block's worth of coefficients */ /* Section F.2.2.1: decode the DC coefficient difference */ - HUFF_DECODE(s, br_state, dctbl, return FALSE, label1); - if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); + { /* HUFFX_DECODE */ + register int nb, look, t; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label11; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label11: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) { + return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label1; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = dctbl->lookx_nbits[look]) != 0) { + s = dctbl->lookx_val[look]; + if (nb <= HUFFX_LOOKAHEAD) { + DROP_BITS(nb); + } else { + DROP_BITS(HUFFX_LOOKAHEAD); + nb -= HUFFX_LOOKAHEAD; + CHECK_BIT_BUFFER(br_state, nb, return FALSE); + s += GET_BITS(nb); + } + } else { + nb = HUFFX_LOOKAHEAD; + label1: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,dctbl,nb)) + < 0) { return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + t = GET_BITS(s); + s = HUFF_EXTEND(t, s); + } + } } - if (entropy->dc_needed[blkn]) { /* Convert DC difference to actual value, update last_dc_val */ int ci = cinfo->MCU_membership[blkn]; @@ -569,16 +597,65 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Section F.2.2.2: decode the AC coefficients */ /* Since zeroes are skipped, output area must be cleared beforehand */ for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label2); - - r = s >> 4; - s &= 15; - + { /* HUFFX_DECODE */ + register int nb, look, t; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte + = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label21; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label21: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left,0)) { + return FALSE; } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label2; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = actbl->lookx_nbits[look]) != 0) { + s = actbl->lookx_val[look]; + r = actbl->lookx_sym[look] >> 4; + if (nb <= HUFFX_LOOKAHEAD) { + DROP_BITS(nb); + } else { + DROP_BITS(HUFFX_LOOKAHEAD); + nb -= HUFFX_LOOKAHEAD; + CHECK_BIT_BUFFER(br_state, nb, return FALSE); + s += GET_BITS(nb); + } + } else { + nb = HUFFX_LOOKAHEAD; + label2: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,actbl,nb)) + < 0) { return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + r = s >> 4; s &= 15; + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + t = GET_BITS(s); + s = HUFF_EXTEND(t, s); + } + } + } if (s) { k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); /* Output coefficient in natural (dezigzagged) order. * Note: the extra entries in jpeg_natural_order[] will save us * if k >= DCTSIZE2, which could happen if the data is corrupted. @@ -596,15 +673,64 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Section F.2.2.2: decode the AC coefficients */ /* In this path we just discard the values */ for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label3); - - r = s >> 4; - s &= 15; - + { /* HUFFX_DECODE */ + register int nb, look; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte + = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label31; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label31: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left,0)) { + return FALSE; } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label3; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = actbl->lookx_nbits[look]) != 0) { + s = actbl->lookx_sym[look]; + r = s >> 4; s &= 15; + if (nb <= HUFFX_LOOKAHEAD) { + DROP_BITS(nb); + } else { + DROP_BITS(HUFFX_LOOKAHEAD); + nb -= HUFFX_LOOKAHEAD; + CHECK_BIT_BUFFER(br_state, nb, return FALSE); + DROP_BITS(nb); + } + } else { + nb = HUFFX_LOOKAHEAD; + label3: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,actbl,nb)) + < 0) { return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + r = s >> 4; s &= 15; + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } + } + } if (s) { k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - DROP_BITS(s); } else { if (r != 15) break; diff --git a/jdhuff.h b/jdhuff.h index ae19b6c..b5e193e 100644 --- a/jdhuff.h +++ b/jdhuff.h @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : October 31, 2004 + * --------------------------------------------------------------------- + * * This file contains declarations for Huffman entropy decoding routines * that are shared between the sequential decoder (jdhuff.c) and the * progressive decoder (jdphuff.c). No other modules need to see these. @@ -21,7 +28,7 @@ /* Derived data constructed for each Huffman table */ -#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ +#define HUFFX_LOOKAHEAD 9 /* # of bits of lookahead */ typedef struct { /* Basic tables: (element [0] of each array is unused) */ @@ -36,13 +43,15 @@ typedef struct { /* Link to public Huffman table (needed only in jpeg_huff_decode) */ JHUFF_TBL *pub; - /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of + /* Lookahead tables: indexed by the next HUFFX_LOOKAHEAD bits of * the input data stream. If the next Huffman code is no more - * than HUFF_LOOKAHEAD bits long, we can obtain its length and - * the corresponding symbol directly from these tables. + * than HUFFX_LOOKAHEAD-1 bits long, we can obtain its length, + * the corresponding symbol, and the encoded coefficient value + * directly from these tables. */ - int look_nbits[1<src->next_input_byte; \ br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ get_buffer = permstate.get_buffer; \ - bits_left = permstate.bits_left; + bits_left = permstate.bits_left #define BITREAD_SAVE_STATE(cinfop,permstate) \ cinfop->src->next_input_byte = br_state.next_input_byte; \ @@ -155,47 +179,14 @@ EXTERN(boolean) jpeg_fill_bit_buffer JPP((bitread_working_state * state, register bit_buf_type get_buffer, register int bits_left, int nbits)); - -/* - * Code for extracting next Huffman-coded symbol from input bit stream. - * Again, this is time-critical and we make the main paths be macros. - * - * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits - * without looping. Usually, more than 95% of the Huffman codes will be 8 - * or fewer bits long. The few overlength codes are handled with a loop, - * which need not be inline code. - * - * Notes about the HUFF_DECODE macro: - * 1. Near the end of the data segment, we may fail to get enough bits - * for a lookahead. In that case, we do it the hard way. - * 2. If the lookahead table contains no entry, the next code must be - * more than HUFF_LOOKAHEAD bits long. - * 3. jpeg_huff_decode returns -1 if forced to suspend. - */ - -#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \ -{ register int nb, look; \ - if (bits_left < HUFF_LOOKAHEAD) { \ - if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ - if (bits_left < HUFF_LOOKAHEAD) { \ - nb = 1; goto slowlabel; \ - } \ - } \ - look = PEEK_BITS(HUFF_LOOKAHEAD); \ - if ((nb = htbl->look_nbits[look]) != 0) { \ - DROP_BITS(nb); \ - result = htbl->look_sym[look]; \ - } else { \ - nb = HUFF_LOOKAHEAD+1; \ -slowlabel: \ - if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ - { failaction; } \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ - } \ -} - /* Out-of-line case for Huffman code fetching */ EXTERN(int) jpeg_huff_decode JPP((bitread_working_state * state, register bit_buf_type get_buffer, register int bits_left, d_derived_tbl * htbl, int min_bits)); + + +/* + * Figure F.12: extend sign bit. + */ + +#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) diff --git a/jdmerge.c b/jdmerge.c index 3744446..f440d40 100644 --- a/jdmerge.c +++ b/jdmerge.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 5, 2006 + * --------------------------------------------------------------------- + * * This file contains code for merged upsampling/color conversion. * * This file combines functions from jdsample.c and jdcolor.c; @@ -35,6 +42,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jcolsamp.h" /* Private declarations */ #ifdef UPSAMPLE_MERGING_SUPPORTED @@ -218,6 +226,17 @@ merged_1v_upsample (j_decompress_ptr cinfo, */ +#if RGB_PIXELSIZE == 4 +/* offset of filler byte */ +#define RGB_FILLER (6 - (RGB_RED) - (RGB_GREEN) - (RGB_BLUE)) +/* byte pattern to fill with */ +#ifdef RGBX_FILLER_0XFF +#define RGB_FILLER_BYTE 0xFF +#else +#define RGB_FILLER_BYTE 0x00 +#endif +#endif /* RGB_PIXELSIZE == 4 */ + /* * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. */ @@ -258,11 +277,17 @@ h2v1_merged_upsample (j_decompress_ptr cinfo, outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr0++); outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ @@ -276,6 +301,9 @@ h2v1_merged_upsample (j_decompress_ptr cinfo, outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr[RGB_FILLER] = RGB_FILLER_BYTE; +#endif } } @@ -322,21 +350,33 @@ h2v2_merged_upsample (j_decompress_ptr cinfo, outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr0[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr0[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr1[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr1 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr1[RGB_FILLER] = RGB_FILLER_BYTE; +#endif outptr1 += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ @@ -350,10 +390,16 @@ h2v2_merged_upsample (j_decompress_ptr cinfo, outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr0[RGB_FILLER] = RGB_FILLER_BYTE; +#endif y = GETJSAMPLE(*inptr01); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#if RGB_PIXELSIZE == 4 + outptr1[RGB_FILLER] = RGB_FILLER_BYTE; +#endif } } @@ -370,6 +416,7 @@ GLOBAL(void) jinit_merged_upsampler (j_decompress_ptr cinfo) { my_upsample_ptr upsample; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -382,19 +429,73 @@ jinit_merged_upsampler (j_decompress_ptr cinfo) if (cinfo->max_v_samp_factor == 2) { upsample->pub.upsample = merged_2v_upsample; - upsample->upmethod = h2v2_merged_upsample; +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JDMERGE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2)) { + upsample->upmethod = jpeg_h2v2_merged_upsample_sse2; + } else +#endif +#ifdef JDMERGE_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + upsample->upmethod = jpeg_h2v2_merged_upsample_mmx; + } else +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + { + upsample->upmethod = h2v2_merged_upsample; + build_ycc_rgb_table(cinfo); + } /* Allocate a spare row buffer */ upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE))); } else { upsample->pub.upsample = merged_1v_upsample; - upsample->upmethod = h2v1_merged_upsample; +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JDMERGE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2)) { + upsample->upmethod = jpeg_h2v1_merged_upsample_sse2; + } else +#endif +#ifdef JDMERGE_MMX_SUPPORTED + if (simd & JSIMD_MMX) { + upsample->upmethod = jpeg_h2v1_merged_upsample_mmx; + } else +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + { + upsample->upmethod = h2v1_merged_upsample; + build_ycc_rgb_table(cinfo); + } /* No spare row needed */ upsample->spare_row = NULL; } +} + - build_ycc_rgb_table(cinfo); +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_merged_upsampler (j_decompress_ptr cinfo) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + +#if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +#ifdef JDMERGE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_merged_upsample_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JDMERGE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif +#endif /* RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 */ + + return JSIMD_NONE; } +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ #endif /* UPSAMPLE_MERGING_SUPPORTED */ diff --git a/jdmermmx.asm b/jdmermmx.asm new file mode 100644 index 0000000..4c88515 --- /dev/null +++ b/jdmermmx.asm @@ -0,0 +1,981 @@ +; +; jdmermmx.asm - merged upsampling/color conversion (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%ifdef UPSAMPLE_MERGING_SUPPORTED +%ifdef JDMERGE_MMX_SUPPORTED + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_mmx) + +EXTN(jconst_merged_upsample_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v1_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v1_merged_upsample_mmx) + +EXTN(jpeg_h2v1_merged_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jdstruct_output_width(ecx)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) + movq mm7, MMWORD [edx] ; mm7=Cr(01234567) + + pxor mm1,mm1 ; mm1=(all 0's) + pcmpeqw mm3,mm3 + psllw mm3,7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} + + movq mm4,mm6 + punpckhbw mm6,mm1 ; mm6=Cb(4567)=CbH + punpcklbw mm4,mm1 ; mm4=Cb(0123)=CbL + movq mm0,mm7 + punpckhbw mm7,mm1 ; mm7=Cr(4567)=CrH + punpcklbw mm0,mm1 ; mm0=Cr(0123)=CrL + + paddw mm6,mm3 + paddw mm4,mm3 + paddw mm7,mm3 + paddw mm0,mm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm5,mm6 ; mm5=CbH + movq mm2,mm4 ; mm2=CbL + paddw mm6,mm6 ; mm6=2*CbH + paddw mm4,mm4 ; mm4=2*CbL + movq mm1,mm7 ; mm1=CrH + movq mm3,mm0 ; mm3=CrL + paddw mm7,mm7 ; mm7=2*CrH + paddw mm0,mm0 ; mm0=2*CrL + + pmulhw mm6,[GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) + pmulhw mm7,[GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) + + paddw mm6,[GOTOFF(eax,PW_ONE)] + paddw mm4,[GOTOFF(eax,PW_ONE)] + psraw mm6,1 ; mm6=(CbH * -FIX(0.22800)) + psraw mm4,1 ; mm4=(CbL * -FIX(0.22800)) + paddw mm7,[GOTOFF(eax,PW_ONE)] + paddw mm0,[GOTOFF(eax,PW_ONE)] + psraw mm7,1 ; mm7=(CrH * FIX(0.40200)) + psraw mm0,1 ; mm0=(CrL * FIX(0.40200)) + + paddw mm6,mm5 + paddw mm4,mm2 + paddw mm6,mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H + paddw mm4,mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L + paddw mm7,mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H + paddw mm0,mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L + + movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H + movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H + + movq mm6,mm5 + movq mm7,mm2 + punpcklwd mm5,mm1 + punpckhwd mm6,mm1 + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm2,mm3 + punpckhwd mm7,mm3 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + paddd mm6,[GOTOFF(eax,PD_ONEHALF)] + psrad mm5,SCALEBITS + psrad mm6,SCALEBITS + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm7,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm7,SCALEBITS + + packssdw mm5,mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw mm2,mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw mm5,mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw mm2,mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H + movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H + movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movq mm7, MMWORD [esi] ; mm7=Y(01234567) + + pcmpeqw mm6,mm6 + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + pand mm6,mm7 ; mm6=Y(0246)=YE + psrlw mm7,BYTE_BIT ; mm7=Y(1357)=YO + + movq mm1,mm0 ; mm1=mm0=(R-Y)(L/H) + movq mm3,mm2 ; mm3=mm2=(G-Y)(L/H) + movq mm5,mm4 ; mm5=mm4=(B-Y)(L/H) + + paddw mm0,mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4,mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) + paddw mm5,mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA,DWORD_BIT + movd eax,mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax,WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .endcolumn + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .endcolumn + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%ifndef USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jpeg_h2v2_merged_upsample_mmx) + +EXTN(jpeg_h2v2_merged_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [cinfo(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; cinfo + + call near EXTN(jpeg_h2v1_merged_upsample_mmx) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jpeg_h2v1_merged_upsample_mmx) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%else ; USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 10 +%define inptr1 wk(0)-SIZEOF_JSAMPROW ; JSAMPROW inptr1 +%define inptr2 inptr1-SIZEOF_JSAMPROW ; JSAMPROW inptr2 +%define gotptr inptr2-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v2_merged_upsample_mmx) + +EXTN(jpeg_h2v2_merged_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [inptr2] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jdstruct_output_width(ecx)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, JSAMPROW [esi+(ecx*2+0)*SIZEOF_JSAMPROW] ; inptr00 + mov esi, JSAMPROW [esi+(ecx*2+1)*SIZEOF_JSAMPROW] ; inptr01 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + + pop ecx ; col + push eax ; inptr00 + push esi ; inptr01 + + mov esi, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) + movq mm7, MMWORD [edx] ; mm7=Cr(01234567) + + mov JSAMPROW [inptr1], ebx ; inptr1 + mov JSAMPROW [inptr2], edx ; inptr2 + pop edx ; edx=inptr01 + pop ebx ; ebx=inptr00 + + pxor mm1,mm1 ; mm1=(all 0's) + pcmpeqw mm3,mm3 + psllw mm3,7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} + + movq mm4,mm6 + punpckhbw mm6,mm1 ; mm6=Cb(4567)=CbH + punpcklbw mm4,mm1 ; mm4=Cb(0123)=CbL + movq mm0,mm7 + punpckhbw mm7,mm1 ; mm7=Cr(4567)=CrH + punpcklbw mm0,mm1 ; mm0=Cr(0123)=CrL + + paddw mm6,mm3 + paddw mm4,mm3 + paddw mm7,mm3 + paddw mm0,mm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm5,mm6 ; mm5=CbH + movq mm2,mm4 ; mm2=CbL + paddw mm6,mm6 ; mm6=2*CbH + paddw mm4,mm4 ; mm4=2*CbL + movq mm1,mm7 ; mm1=CrH + movq mm3,mm0 ; mm3=CrL + paddw mm7,mm7 ; mm7=2*CrH + paddw mm0,mm0 ; mm0=2*CrL + + pmulhw mm6,[GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) + pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) + pmulhw mm7,[GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) + pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) + + paddw mm6,[GOTOFF(eax,PW_ONE)] + paddw mm4,[GOTOFF(eax,PW_ONE)] + psraw mm6,1 ; mm6=(CbH * -FIX(0.22800)) + psraw mm4,1 ; mm4=(CbL * -FIX(0.22800)) + paddw mm7,[GOTOFF(eax,PW_ONE)] + paddw mm0,[GOTOFF(eax,PW_ONE)] + psraw mm7,1 ; mm7=(CrH * FIX(0.40200)) + psraw mm0,1 ; mm0=(CrL * FIX(0.40200)) + + paddw mm6,mm5 + paddw mm4,mm2 + paddw mm6,mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H + paddw mm4,mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L + paddw mm7,mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H + paddw mm0,mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L + + movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H + movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H + + movq mm6,mm5 + movq mm7,mm2 + punpcklwd mm5,mm1 + punpckhwd mm6,mm1 + pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm2,mm3 + punpckhwd mm7,mm3 + pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm5,[GOTOFF(eax,PD_ONEHALF)] + paddd mm6,[GOTOFF(eax,PD_ONEHALF)] + psrad mm5,SCALEBITS + psrad mm6,SCALEBITS + paddd mm2,[GOTOFF(eax,PD_ONEHALF)] + paddd mm7,[GOTOFF(eax,PD_ONEHALF)] + psrad mm2,SCALEBITS + psrad mm7,SCALEBITS + + packssdw mm5,mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw mm2,mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw mm5,mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw mm2,mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H + + mov ah,2 ; YHctr + jmp short .YHloop_1st + alignx 16,7 + +.YHloop_2nd: + movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H + movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H + movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H + alignx 16,7 + +.YHloop_1st: + movq MMWORD [wk(3)], mm0 ; wk(3)=(R-Y)(L/H) + movq MMWORD [wk(4)], mm2 ; wk(4)=(G-Y)(L/H) + movq MMWORD [wk(5)], mm4 ; wk(5)=(B-Y)(L/H) + + movq mm7, MMWORD [ebx] ; mm7=Y(01234567) + + mov al,2 ; YVctr + jmp short .YVloop_1st + alignx 16,7 + +.YVloop_2nd: + movq mm0, MMWORD [wk(3)] ; mm0=(R-Y)(L/H) + movq mm2, MMWORD [wk(4)] ; mm2=(G-Y)(L/H) + movq mm4, MMWORD [wk(5)] ; mm4=(B-Y)(L/H) + + movq mm7, MMWORD [edx] ; mm7=Y(01234567) + alignx 16,7 + +.YVloop_1st: + pcmpeqw mm6,mm6 + psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + pand mm6,mm7 ; mm6=Y(0246)=YE + psrlw mm7,BYTE_BIT ; mm7=Y(1357)=YO + + movq mm1,mm0 ; mm1=mm0=(R-Y)(L/H) + movq mm3,mm2 ; mm3=mm2=(G-Y)(L/H) + movq mm5,mm4 ; mm5=mm4=(B-Y)(L/H) + + paddw mm0,mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) + paddw mm1,mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) + packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2,mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) + paddw mm3,mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4,mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) + paddw mm5,mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG,mmA + movq mmH,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC,mmD + movq mmB,mmD + punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF,mmE + punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) + + dec al ; YVctr + jz short .YVloop_break + + movq MMWORD [wk(6)], mmA + movq MMWORD [wk(7)], mmE + movq MMWORD [wk(8)], mmC + + jmp near .YVloop_2nd + alignx 16,7 + +.YVloop_break: + movq mmH, MMWORD [wk(6)] + movq mmB, MMWORD [wk(7)] + movq mmD, MMWORD [wk(8)] + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [esi+0*SIZEOF_MMWORD], mmH + movq MMWORD [esi+1*SIZEOF_MMWORD], mmB + movq MMWORD [esi+2*SIZEOF_MMWORD], mmD + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz near .endcolumn + + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr0 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr1 + add ebx, byte SIZEOF_MMWORD ; inptr00 + add edx, byte SIZEOF_MMWORD ; inptr01 + dec ah ; YHctr + jnz near .YHloop_2nd + + push ebx ; inptr00 + push edx ; inptr01 + mov ebx, JSAMPROW [inptr1] ; ebx=inptr1 + mov edx, JSAMPROW [inptr2] ; edx=inptr2 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [esi+0*SIZEOF_MMWORD], mmH + movq MMWORD [esi+1*SIZEOF_MMWORD], mmB + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmH,mmD + movq mmA,mmC + sub ecx, byte 2*SIZEOF_MMWORD + add esi, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [esi+0*SIZEOF_MMWORD], mmH + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmH,mmB + movq mmA,mmE + sub ecx, byte SIZEOF_MMWORD + add esi, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax,mmH + movd edx,mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [esi+0*SIZEOF_DWORD], eax + mov DWORD [edi+0*SIZEOF_DWORD], edx + psrlq mmH,DWORD_BIT + psrlq mmA,DWORD_BIT + movd eax,mmH + movd edx,mmA + sub ecx, byte SIZEOF_DWORD + add esi, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [esi+0*SIZEOF_WORD], ax + mov WORD [edi+0*SIZEOF_WORD], dx + shr eax,WORD_BIT + shr edx,WORD_BIT + sub ecx, byte SIZEOF_WORD + add esi, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .endcolumn + mov BYTE [esi+0*SIZEOF_BYTE], al + mov BYTE [edi+0*SIZEOF_BYTE], dl + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC,mmA + punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG,mmB + punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD,mmA + punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH,mmC + punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) + + dec al ; YVctr + jz short .YVloop_break + + movq MMWORD [wk(6)], mmA + movq MMWORD [wk(7)], mmD + movq MMWORD [wk(8)], mmC + movq MMWORD [wk(9)], mmH + + jmp near .YVloop_2nd + alignx 16,7 + +.YVloop_break: + movq mmE, MMWORD [wk(6)] + movq mmF, MMWORD [wk(7)] + movq mmB, MMWORD [wk(8)] + movq mmG, MMWORD [wk(9)] + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [esi+0*SIZEOF_MMWORD], mmE + movq MMWORD [esi+1*SIZEOF_MMWORD], mmF + movq MMWORD [esi+2*SIZEOF_MMWORD], mmB + movq MMWORD [esi+3*SIZEOF_MMWORD], mmG + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr0 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr1 + add ebx, byte SIZEOF_MMWORD ; inptr00 + add edx, byte SIZEOF_MMWORD ; inptr01 + dec ah ; YHctr + jnz near .YHloop_2nd + + push ebx ; inptr00 + push edx ; inptr01 + mov ebx, JSAMPROW [inptr1] ; ebx=inptr1 + mov edx, JSAMPROW [inptr2] ; edx=inptr2 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [esi+0*SIZEOF_MMWORD], mmE + movq MMWORD [esi+1*SIZEOF_MMWORD], mmF + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmE,mmB + movq mmF,mmG + movq mmA,mmC + movq mmD,mmH + sub ecx, byte SIZEOF_MMWORD/2 + add esi, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [esi+0*SIZEOF_MMWORD], mmE + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmE,mmF + movq mmA,mmD + sub ecx, byte SIZEOF_MMWORD/4 + add esi, byte 1*SIZEOF_MMWORD + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .endcolumn + movd DWORD [esi+0*SIZEOF_DWORD], mmE + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; !USE_DEDICATED_H2V2_MERGED_UPSAMPLE_MMX + +%endif ; JDMERGE_MMX_SUPPORTED +%endif ; UPSAMPLE_MERGING_SUPPORTED +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 diff --git a/jdmerss2.asm b/jdmerss2.asm new file mode 100644 index 0000000..b6f51c8 --- /dev/null +++ b/jdmerss2.asm @@ -0,0 +1,1272 @@ +; +; jdmerss2.asm - merged upsampling/color conversion (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%if RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 +%ifdef UPSAMPLE_MERGING_SUPPORTED +%ifdef JDMERGE_SSE2_SUPPORTED + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_merged_upsample_sse2) + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v1_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v1_merged_upsample_sse2) + +EXTN(jpeg_h2v1_merged_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jdstruct_output_width(ecx)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[GOTOFF(eax,PW_ONE)] + paddw xmm4,[GOTOFF(eax,PW_ONE)] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[GOTOFF(eax,PW_ONE)] + paddw xmm0,[GOTOFF(eax,PW_ONE)] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm6,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm7,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al,2 ; Yctr + jmp short .Yloop_1st + alignx 16,7 + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + alignx 16,7 + +.Yloop_1st: + movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + jmp short .out0 +.out1: ; --(unaligned)----------------- + pcmpeqb xmmH,xmmH ; xmmH=(all 1's) + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF + add edi, byte SIZEOF_XMMWORD ; outptr +.out0: + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + pcmpeqb xmmH,xmmH ; xmmH=(all 1's) + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + mov eax,ecx + xor ecx, byte 0x0F + shl ecx, 2 + movd xmmB,ecx + psrlq xmmH,4 + pcmpeqb xmmE,xmmE + psrlq xmmH,xmmB + psrlq xmmE,xmmB + punpcklbw xmmE,xmmH + ; ---------------- + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0 + add eax,ecx + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0 + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx + movdqa xmmG,xmmA + movdqa xmmC,xmmE + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmE, SIZEOF_XMMWORD/2 + movd xmmD,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1 + movd xmmF,ecx + psllq xmmA,xmmF + psllq xmmE,xmmF + jmp short .adj0 +.adj1: neg ecx + movd xmmF,ecx + psrlq xmmA,xmmF + psrlq xmmE,xmmF + psllq xmmG,xmmD + psllq xmmC,xmmD + por xmmA,xmmG + por xmmE,xmmC +.adj0: ; ---------------- + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + jmp short .out0 +.out1: ; --(unaligned)----------------- + pcmpeqb xmmE,xmmE ; xmmE=(all 1's) + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr +.out0: + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + pcmpeqb xmmE,xmmE ; xmmE=(all 1's) + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmC + movdqa xmmD,xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA,xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + cmp ecx, byte SIZEOF_XMMWORD/16 + jb short .endcolumn + mov eax,ecx + xor ecx, byte 0x03 + inc ecx + shl ecx, 4 + movd xmmF,ecx + psrlq xmmE,xmmF + punpcklbw xmmE,xmmE + ; ---------------- + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0 + lea eax, [ecx+eax*4] ; RGB_PIXELSIZE + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0 + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx + movdqa xmmB,xmmA + movdqa xmmG,xmmE + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmE, SIZEOF_XMMWORD/2 + movd xmmC,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1 + movd xmmH,ecx + psllq xmmA,xmmH + psllq xmmE,xmmH + jmp short .adj0 +.adj1: neg ecx + movd xmmH,ecx + psrlq xmmA,xmmH + psrlq xmmE,xmmH + psllq xmmB,xmmC + psllq xmmG,xmmC + por xmmA,xmmB + por xmmE,xmmG +.adj0: ; ---------------- + maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%ifndef USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2 + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v2_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + + align 16 + global EXTN(jpeg_h2v2_merged_upsample_sse2) + +EXTN(jpeg_h2v2_merged_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [cinfo(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx,esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; cinfo + + call near EXTN(jpeg_h2v1_merged_upsample_sse2) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jpeg_h2v1_merged_upsample_sse2) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%else ; USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2 + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jpeg_h2v2_merged_upsample_sse2 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 10 +%define inptr1 wk(0)-SIZEOF_JSAMPROW ; JSAMPROW inptr1 +%define inptr2 inptr1-SIZEOF_JSAMPROW ; JSAMPROW inptr2 +%define gotptr inptr2-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v2_merged_upsample_sse2) + +EXTN(jpeg_h2v2_merged_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [inptr2] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, POINTER [cinfo(eax)] + mov ecx, JDIMENSION [jdstruct_output_width(ecx)] ; col + test ecx,ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, JSAMPROW [esi+(ecx*2+0)*SIZEOF_JSAMPROW] ; inptr00 + mov esi, JSAMPROW [esi+(ecx*2+1)*SIZEOF_JSAMPROW] ; inptr01 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + + pop ecx ; col + push eax ; inptr00 + push esi ; inptr01 + + mov esi, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + alignx 16,7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) + + mov JSAMPROW [inptr1], ebx ; inptr1 + mov JSAMPROW [inptr2], edx ; inptr2 + pop edx ; edx=inptr01 + pop ebx ; ebx=inptr00 + + pxor xmm1,xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3,xmm3 + psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4,xmm6 + punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0,xmm7 + punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6,xmm3 + paddw xmm4,xmm3 + paddw xmm7,xmm3 + paddw xmm0,xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5,xmm6 ; xmm5=CbH + movdqa xmm2,xmm4 ; xmm2=CbL + paddw xmm6,xmm6 ; xmm6=2*CbH + paddw xmm4,xmm4 ; xmm4=2*CbL + movdqa xmm1,xmm7 ; xmm1=CrH + movdqa xmm3,xmm0 ; xmm3=CrL + paddw xmm7,xmm7 ; xmm7=2*CrH + paddw xmm0,xmm0 ; xmm0=2*CrL + + pmulhw xmm6,[GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6,[GOTOFF(eax,PW_ONE)] + paddw xmm4,[GOTOFF(eax,PW_ONE)] + psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7,[GOTOFF(eax,PW_ONE)] + paddw xmm0,[GOTOFF(eax,PW_ONE)] + psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6,xmm5 + paddw xmm4,xmm2 + paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6,xmm5 + movdqa xmm7,xmm2 + punpcklwd xmm5,xmm1 + punpckhwd xmm6,xmm1 + pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm6,[GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm2,xmm3 + punpckhwd xmm7,xmm3 + pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm7,[GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm6,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm5,SCALEBITS + psrad xmm6,SCALEBITS + paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] + paddd xmm7,[GOTOFF(eax,PD_ONEHALF)] + psrad xmm2,SCALEBITS + psrad xmm7,SCALEBITS + + packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov ah,2 ; YHctr + jmp short .YHloop_1st + alignx 16,7 + +.YHloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + alignx 16,7 + +.YHloop_1st: + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(R-Y)(L/H) + movdqa XMMWORD [wk(4)], xmm2 ; wk(4)=(G-Y)(L/H) + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=(B-Y)(L/H) + + movdqa xmm7, XMMWORD [ebx] ; xmm7=Y(0123456789ABCDEF) + + mov al,2 ; YVctr + jmp short .YVloop_1st + alignx 16,7 + +.YVloop_2nd: + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=(R-Y)(L/H) + movdqa xmm2, XMMWORD [wk(4)] ; xmm2=(G-Y)(L/H) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(B-Y)(L/H) + + movdqa xmm7, XMMWORD [edx] ; xmm7=Y(0123456789ABCDEF) + alignx 16,7 + +.YVloop_1st: + pcmpeqw xmm6,xmm6 + psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG,xmmA + movdqa xmmH,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC,xmmD + movdqa xmmB,xmmD + punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF,xmmE + punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB,xmmE + punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB,xmmF + punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + dec al ; YVctr + jz short .YVloop_break + + movdqa XMMWORD [wk(6)], xmmA + movdqa XMMWORD [wk(7)], xmmD + movdqa XMMWORD [wk(8)], xmmF + + jmp near .YVloop_2nd + alignx 16,7 + +.YVloop_break: + movdqa xmmH, XMMWORD [wk(6)] + movdqa xmmB, XMMWORD [wk(7)] + movdqa xmmE, XMMWORD [wk(8)] + + pcmpeqb xmmG,xmmG ; xmmG=(all 1's) + + cmp ecx, byte SIZEOF_XMMWORD + jb near .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out01 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr1 + jmp short .out00 +.out01: ; --(unaligned)----------------- + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmD,xmmG ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmF,xmmG ; movntdqu XMMWORD [edi], xmmF + add edi, byte SIZEOF_XMMWORD ; outptr1 +.out00: + test esi, SIZEOF_XMMWORD-1 + jnz short .out11 + ; --(aligned)------------------- + movntdq XMMWORD [esi+0*SIZEOF_XMMWORD], xmmH + movntdq XMMWORD [esi+1*SIZEOF_XMMWORD], xmmB + movntdq XMMWORD [esi+2*SIZEOF_XMMWORD], xmmE + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr0 + jmp short .out10 +.out11: ; --(unaligned)----------------- + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmH,xmmG ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmB,xmmG ; movntdqu XMMWORD [edi], xmmB + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmE,xmmG ; movntdqu XMMWORD [edi], xmmE + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 +.out10: + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add ebx, byte SIZEOF_XMMWORD ; inptr00 + add edx, byte SIZEOF_XMMWORD ; inptr01 + dec ah ; YHctr + jnz near .YHloop_2nd + + push ebx ; inptr00 + push edx ; inptr01 + mov ebx, JSAMPROW [inptr1] ; ebx=inptr1 + mov edx, JSAMPROW [inptr2] ; edx=inptr2 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmD,xmmG ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr1 + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmH,xmmG ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmB,xmmG ; movntdqu XMMWORD [edi], xmmB + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 + movdqa xmmA,xmmF + movdqa xmmH,xmmE + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmH,xmmG ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 + movdqa xmmA,xmmD + movdqa xmmH,xmmB + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + mov edx,ecx + xor ecx, byte 0x0F + shl ecx, 2 + movd xmmC,ecx + psrlq xmmG,4 + pcmpeqb xmmD,xmmD + psrlq xmmG,xmmC + psrlq xmmD,xmmC + punpcklbw xmmD,xmmG + movdqa xmmB,xmmD + ; ================ + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0a + lea eax, [ecx+edx] + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0a + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmD,ecx + movdqa xmmF,xmmA + movdqa xmmE,xmmD + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmD, SIZEOF_XMMWORD/2 + movd xmmC,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1a + movd xmmG,ecx + psllq xmmA,xmmG + psllq xmmD,xmmG + jmp short .adj0a +.adj1a: neg ecx + movd xmmG,ecx + psrlq xmmA,xmmG + psrlq xmmD,xmmG + psllq xmmF,xmmC + psllq xmmE,xmmC + por xmmA,xmmF + por xmmD,xmmE +.adj0a: ; ---------------- + maskmovdqu xmmA,xmmD ; movntdqu XMMWORD [edi], xmmA + xchg edi,esi ; edi=outptr0, esi=outptr1 + ; ================ + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0b + lea eax, [ecx+edx] + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0b + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmH,ecx & pslldq xmmB,ecx + movdqa xmmG,xmmH + movdqa xmmC,xmmB + pslldq xmmH, SIZEOF_XMMWORD/2 + pslldq xmmB, SIZEOF_XMMWORD/2 + movd xmmF,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1b + movd xmmE,ecx + psllq xmmH,xmmE + psllq xmmB,xmmE + jmp short .adj0b +.adj1b: neg ecx + movd xmmE,ecx + psrlq xmmH,xmmE + psrlq xmmB,xmmE + psllq xmmG,xmmF + psllq xmmC,xmmF + por xmmH,xmmG + por xmmB,xmmC +.adj0b: ; ---------------- + maskmovdqu xmmH,xmmB ; movntdqu XMMWORD [edi], xmmH + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC,xmmA + punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG,xmmB + punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD,xmmA + punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH,xmmC + punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + dec al ; YVctr + jz short .YVloop_break + + movdqa XMMWORD [wk(6)], xmmA + movdqa XMMWORD [wk(7)], xmmD + movdqa XMMWORD [wk(8)], xmmC + movdqa XMMWORD [wk(9)], xmmH + + jmp near .YVloop_2nd + alignx 16,7 + +.YVloop_break: + movdqa xmmE, XMMWORD [wk(6)] + movdqa xmmF, XMMWORD [wk(7)] + movdqa xmmB, XMMWORD [wk(8)] + + pcmpeqb xmmG,xmmG ; xmmG=(all 1's) + + cmp ecx, byte SIZEOF_XMMWORD + jb near .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out01 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr1 + jmp short .out00 +.out01: ; --(unaligned)----------------- + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmD,xmmG ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmC,xmmG ; movntdqu XMMWORD [edi], xmmC + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmH,xmmG ; movntdqu XMMWORD [edi], xmmH + add edi, byte SIZEOF_XMMWORD ; outptr1 +.out00: + movdqa xmmA, XMMWORD [wk(9)] + + test esi, SIZEOF_XMMWORD-1 + jnz short .out11 + ; --(aligned)------------------- + movntdq XMMWORD [esi+0*SIZEOF_XMMWORD], xmmE + movntdq XMMWORD [esi+1*SIZEOF_XMMWORD], xmmF + movntdq XMMWORD [esi+2*SIZEOF_XMMWORD], xmmB + movntdq XMMWORD [esi+3*SIZEOF_XMMWORD], xmmA + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr0 + jmp short .out10 +.out11: ; --(unaligned)----------------- + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmE,xmmG ; movntdqu XMMWORD [edi], xmmE + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmF,xmmG ; movntdqu XMMWORD [edi], xmmF + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmB,xmmG ; movntdqu XMMWORD [edi], xmmB + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 +.out10: + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add ebx, byte SIZEOF_XMMWORD ; inptr00 + add edx, byte SIZEOF_XMMWORD ; inptr01 + dec ah ; YHctr + jnz near .YHloop_2nd + + push ebx ; inptr00 + push edx ; inptr01 + mov ebx, JSAMPROW [inptr1] ; ebx=inptr1 + mov edx, JSAMPROW [inptr2] ; edx=inptr2 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16,7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + maskmovdqu xmmD,xmmG ; movntdqu XMMWORD [edi], xmmD + add edi, byte SIZEOF_XMMWORD ; outptr1 + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmE,xmmG ; movntdqu XMMWORD [edi], xmmE + add edi, byte SIZEOF_XMMWORD ; outptr0 + maskmovdqu xmmF,xmmG ; movntdqu XMMWORD [edi], xmmF + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 + movdqa xmmA,xmmC + movdqa xmmD,xmmH + movdqa xmmE,xmmB + movdqa xmmF, XMMWORD [wk(9)] + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr1 + xchg edi,esi ; edi=outptr0, esi=outptr1 + maskmovdqu xmmE,xmmG ; movntdqu XMMWORD [edi], xmmE + add edi, byte SIZEOF_XMMWORD ; outptr0 + xchg edi,esi ; edi=outptr1, esi=outptr0 + movdqa xmmA,xmmD + movdqa xmmE,xmmF + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + cmp ecx, byte SIZEOF_XMMWORD/16 + jb near .endcolumn + mov edx,ecx + xor ecx, byte 0x03 + inc ecx + shl ecx, 4 + movd xmmC,ecx + psrlq xmmG,xmmC + punpcklbw xmmG,xmmG + movdqa xmmH,xmmG + ; ================ + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0a + lea eax, [ecx+edx*4] ; RGB_PIXELSIZE + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0a + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmG,ecx + movdqa xmmB,xmmA + movdqa xmmD,xmmG + pslldq xmmA, SIZEOF_XMMWORD/2 + pslldq xmmG, SIZEOF_XMMWORD/2 + movd xmmF,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1a + movd xmmC,ecx + psllq xmmA,xmmC + psllq xmmG,xmmC + jmp short .adj0a +.adj1a: neg ecx + movd xmmC,ecx + psrlq xmmA,xmmC + psrlq xmmG,xmmC + psllq xmmB,xmmF + psllq xmmD,xmmF + por xmmA,xmmB + por xmmG,xmmD +.adj0a: ; ---------------- + maskmovdqu xmmA,xmmG ; movntdqu XMMWORD [edi], xmmA + xchg edi,esi ; edi=outptr0, esi=outptr1 + ; ================ + mov ecx,edi + and ecx, byte SIZEOF_XMMWORD-1 + jz short .adj0b + lea eax, [ecx+edx*4] ; RGB_PIXELSIZE + cmp eax, byte SIZEOF_XMMWORD + ja short .adj0b + and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary + shl ecx, 3 ; pslldq xmmE,ecx & pslldq xmmH,ecx + movdqa xmmC,xmmE + movdqa xmmF,xmmH + pslldq xmmE, SIZEOF_XMMWORD/2 + pslldq xmmH, SIZEOF_XMMWORD/2 + movd xmmB,ecx + sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT + jb short .adj1b + movd xmmD,ecx + psllq xmmE,xmmD + psllq xmmH,xmmD + jmp short .adj0b +.adj1b: neg ecx + movd xmmD,ecx + psrlq xmmE,xmmD + psrlq xmmH,xmmD + psllq xmmC,xmmB + psllq xmmF,xmmB + por xmmE,xmmC + por xmmH,xmmF +.adj0b: ; ---------------- + maskmovdqu xmmE,xmmH ; movntdqu XMMWORD [edi], xmmE + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; !USE_DEDICATED_H2V2_MERGED_UPSAMPLE_SSE2 + +%endif ; JDMERGE_SSE2_SUPPORTED +%endif ; UPSAMPLE_MERGING_SUPPORTED +%endif ; RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4 diff --git a/jdphuff.c b/jdphuff.c index 2267809..a1d92b7 100644 --- a/jdphuff.c +++ b/jdphuff.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : October 31, 2004 + * --------------------------------------------------------------------- + * * This file contains Huffman entropy decoding routines for progressive JPEG. * * Much of the complexity here has to do with supporting input suspension. @@ -69,6 +76,7 @@ typedef struct { d_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */ + d_derived_tbl * dc_derived_tbls[MAX_COMPS_IN_SCAN]; } phuff_entropy_decoder; typedef phuff_entropy_decoder * phuff_entropy_ptr; @@ -168,6 +176,7 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo) tbl = compptr->dc_tbl_no; jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, & entropy->derived_tbls[tbl]); + entropy->dc_derived_tbls[ci] = entropy->derived_tbls[tbl]; } } else { tbl = compptr->ac_tbl_no; @@ -193,32 +202,6 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo) } -/* - * Figure F.12: extend sign bit. - * On some machines, a shift and add will be faster than a table lookup. - */ - -#ifdef AVOID_TABLES - -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) - -#else - -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) - -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; - -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; - -#endif /* AVOID_TABLES */ - - /* * Check for a restart marker & resynchronize decoder. * Returns FALSE if must suspend. @@ -284,16 +267,12 @@ process_restart (j_decompress_ptr cinfo) METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Al = cinfo->Al; - register int s, r; - int blkn, ci; - JBLOCKROW block; + int blkn; BITREAD_STATE_VARS; savable_state state; - d_derived_tbl * tbl; - jpeg_component_info * compptr; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { @@ -314,21 +293,67 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Outer loop handles each block in the MCU */ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - tbl = entropy->derived_tbls[compptr->dc_tbl_no]; + JBLOCKROW block = MCU_data[blkn]; + int ci = cinfo->MCU_membership[blkn]; + d_derived_tbl * tbl = entropy->dc_derived_tbls[ci]; + register int s; /* Decode a single block's worth of coefficients */ /* Section F.2.2.1: decode the DC coefficient difference */ - HUFF_DECODE(s, br_state, tbl, return FALSE, label1); - if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); + { /* HUFFX_DECODE */ + register int nb, look, t; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label11; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label11: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) { + return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label1; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = tbl->lookx_nbits[look]) != 0) { + s = tbl->lookx_val[look]; + if (nb <= HUFFX_LOOKAHEAD) { + DROP_BITS(nb); + } else { + DROP_BITS(HUFFX_LOOKAHEAD); + nb -= HUFFX_LOOKAHEAD; + CHECK_BIT_BUFFER(br_state, nb, return FALSE); + s += GET_BITS(nb); + } + } else { + nb = HUFFX_LOOKAHEAD; + label1: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb)) + < 0) { return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + t = GET_BITS(s); + s = HUFF_EXTEND(t, s); + } + } } - /* Convert DC difference to actual value, update last_dc_val */ s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; @@ -355,15 +380,12 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; int Al = cinfo->Al; - register int s, k, r; unsigned int EOBRUN; - JBLOCKROW block; BITREAD_STATE_VARS; - d_derived_tbl * tbl; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { @@ -384,22 +406,74 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* There is always only one block per MCU */ - if (EOBRUN > 0) /* if it's a band of zeroes... */ + if (EOBRUN > 0) { /* if it's a band of zeroes... */ EOBRUN--; /* ...process it now (we do nothing) */ - else { + } else { + JBLOCKROW block = MCU_data[0]; + d_derived_tbl * tbl = entropy->ac_derived_tbl; + register int s, k, r; + + /* Load up working state */ BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - block = MCU_data[0]; - tbl = entropy->ac_derived_tbl; for (k = cinfo->Ss; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, return FALSE, label2); - r = s >> 4; - s &= 15; + { /* HUFFX_DECODE */ + register int nb, look, t; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label21; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label21: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) { + return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label2; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = tbl->lookx_nbits[look]) != 0) { + s = tbl->lookx_val[look]; + r = tbl->lookx_sym[look] >> 4; + if (nb <= HUFFX_LOOKAHEAD) { + DROP_BITS(nb); + } else { + DROP_BITS(HUFFX_LOOKAHEAD); + nb -= HUFFX_LOOKAHEAD; + CHECK_BIT_BUFFER(br_state, nb, return FALSE); + s += GET_BITS(nb); + } + } else { + nb = HUFFX_LOOKAHEAD; + label2: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb)) + < 0) { return FALSE; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + r = s >> 4; s &= 15; + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + t = GET_BITS(s); + s = HUFF_EXTEND(t, s); + } + } + } if (s) { k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); /* Scale and output coefficient in natural (dezigzagged) order */ (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al); } else { @@ -440,11 +514,10 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ int blkn; - JBLOCKROW block; BITREAD_STATE_VARS; /* Process restart marker if needed; may have to suspend */ @@ -464,7 +537,7 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Outer loop handles each block in the MCU */ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; + JBLOCKROW block = MCU_data[blkn]; /* Encoded data is simply the next bit of the two's-complement DC value */ CHECK_BIT_BUFFER(br_state, 1, return FALSE); @@ -489,17 +562,17 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ +{ phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; int Se = cinfo->Se; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - int m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + int Al = cinfo->Al; register int s, k, r; unsigned int EOBRUN; JBLOCKROW block; JCOEFPTR thiscoef; BITREAD_STATE_VARS; d_derived_tbl * tbl; + int pm1[2]; int num_newnz; int newnz_pos[DCTSIZE2]; @@ -522,6 +595,13 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) block = MCU_data[0]; tbl = entropy->ac_derived_tbl; + /* The pm1[] array is indexed by a value from relational operator. + * This method eliminates conditional branches depending on random data, + * which result in lower performance on recent processors. + */ + pm1[0] = 1 << cinfo->Al; /* +1 in the bit position being coded */ + pm1[1] = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + /* If we are forced to suspend, we must undo the assignments to any newly * nonzero coefficients in the block, because otherwise we'd get confused * next time about which coefficients were already nonzero. @@ -535,18 +615,63 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (EOBRUN == 0) { for (; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, goto undoit, label3); - r = s >> 4; - s &= 15; - if (s) { - if (s != 1) /* size of new coef should always be 1 */ - WARNMS(cinfo, JWRN_HUFF_BAD_CODE); - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) - s = p1; /* newly nonzero coef is positive */ - else - s = m1; /* newly nonzero coef is negative */ - } else { + { /* HUFFX_DECODE */ + register int nb, look, t; + if (bits_left < HUFFX_LOOKAHEAD) { + register const JOCTET * next_input_byte = br_state.next_input_byte; + register size_t bytes_in_buffer = br_state.bytes_in_buffer; + if (cinfo->unread_marker == 0) { + while (bits_left < MIN_GET_BITS) { + register int c; + if (bytes_in_buffer == 0 || + (c = GETJOCTET(*next_input_byte)) == 0xFF) { + goto label31; } + bytes_in_buffer--; next_input_byte++; + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + } else { + label31: + br_state.next_input_byte = next_input_byte; + br_state.bytes_in_buffer = bytes_in_buffer; + if (! jpeg_fill_bit_buffer(&br_state,get_buffer,bits_left, 0)) { + goto undoit; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + if (bits_left < HUFFX_LOOKAHEAD) { + nb = 1; goto label3; + } + } + } + look = PEEK_BITS(HUFFX_LOOKAHEAD); + if ((nb = tbl->lookx_nbits[look]) != 0) { + t = tbl->lookx_sym[look]; + s = tbl->lookx_val[look]; + r = t >> 4; t &= 15; + if (t <= 1) { + DROP_BITS(nb); + } else { /* size of new coef should always be 1 */ + WARNMS(cinfo, JWRN_HUFF_BAD_CODE); + DROP_BITS(nb - (t - 1)); + s = (s >= 0) ? 1 : -1; + } + } else { + nb = HUFFX_LOOKAHEAD; + label3: + if ((s=jpeg_huff_decode(&br_state,get_buffer,bits_left,tbl,nb)) + < 0) { goto undoit; } + get_buffer = br_state.get_buffer; bits_left = br_state.bits_left; + r = s >> 4; s &= 15; + if (s) { + if (s != 1) /* size of new coef should always be 1 */ + WARNMS(cinfo, JWRN_HUFF_BAD_CODE); + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + s = GET_BITS(1) ? 1 : -1; + } + } + } + if (s == 0) { if (r != 15) { EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ if (r) { @@ -567,12 +692,8 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (*thiscoef != 0) { CHECK_BIT_BUFFER(br_state, 1, goto undoit); if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } + if ((*thiscoef & pm1[0]) == 0) /* do nothing if already set it */ + *thiscoef += pm1[(*thiscoef < 0)]; } } else { if (--r < 0) @@ -583,7 +704,7 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (s) { int pos = jpeg_natural_order[k]; /* Output newly nonzero coefficient */ - (*block)[pos] = (JCOEF) s; + (*block)[pos] = (JCOEF) (s << Al); /* Remember its position in case we have to suspend */ newnz_pos[num_newnz++] = pos; } @@ -601,12 +722,8 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (*thiscoef != 0) { CHECK_BIT_BUFFER(br_state, 1, goto undoit); if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } + if ((*thiscoef & pm1[0]) == 0) /* do nothing if already set it */ + *thiscoef += pm1[(*thiscoef < 0)]; } } } diff --git a/jdsammmx.asm b/jdsammmx.asm new file mode 100644 index 0000000..bb17d37 --- /dev/null +++ b/jdsammmx.asm @@ -0,0 +1,893 @@ +; +; jdsammmx.asm - upsampling (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_mmx) + +EXTN(jconst_fancy_upsample_mmx): + +PW_ONE times 4 dw 1 +PW_TWO times 4 dw 2 +PW_THREE times 4 dw 3 +PW_SEVEN times 4 dw 7 +PW_EIGHT times 4 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jpeg_h2v1_fancy_upsample_mmx (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v1_fancy_upsample_mmx) + +EXTN(jpeg_h2v1_fancy_upsample_mmx): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + test eax,eax + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_MMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor mm0,mm0 ; mm0=(all 0's) + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb mm6,mm6 + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movq mm6, MMWORD [esi+1*SIZEOF_MMWORD] + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT + +.upsample: + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2,mm1 + movq mm3,mm1 ; mm1=( 0 1 2 3 4 5 6 7) + psllq mm2,BYTE_BIT ; mm2=( - 0 1 2 3 4 5 6) + psrlq mm3,BYTE_BIT ; mm3=( 1 2 3 4 5 6 7 -) + + por mm2,mm7 ; mm2=(-1 0 1 2 3 4 5 6) + por mm3,mm6 ; mm3=( 1 2 3 4 5 6 7 8) + + movq mm7,mm1 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT ; mm7=( 7 - - - - - - -) + + movq mm4,mm1 + punpcklbw mm1,mm0 ; mm1=( 0 1 2 3) + punpckhbw mm4,mm0 ; mm4=( 4 5 6 7) + movq mm5,mm2 + punpcklbw mm2,mm0 ; mm2=(-1 0 1 2) + punpckhbw mm5,mm0 ; mm5=( 3 4 5 6) + movq mm6,mm3 + punpcklbw mm3,mm0 ; mm3=( 1 2 3 4) + punpckhbw mm6,mm0 ; mm6=( 5 6 7 8) + + pmullw mm1,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm2,[GOTOFF(ebx,PW_ONE)] + paddw mm5,[GOTOFF(ebx,PW_ONE)] + paddw mm3,[GOTOFF(ebx,PW_TWO)] + paddw mm6,[GOTOFF(ebx,PW_TWO)] + + paddw mm2,mm1 + paddw mm5,mm4 + psrlw mm2,2 ; mm2=OutLE=( 0 2 4 6) + psrlw mm5,2 ; mm5=OutHE=( 8 10 12 14) + paddw mm3,mm1 + paddw mm6,mm4 + psrlw mm3,2 ; mm3=OutLO=( 1 3 5 7) + psrlw mm6,2 ; mm6=OutHO=( 9 11 13 15) + + psllw mm3,BYTE_BIT + psllw mm6,BYTE_BIT + por mm2,mm3 ; mm2=OutL=( 0 1 2 3 4 5 6 7) + por mm5,mm6 ; mm5=OutH=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm5 + + sub eax, byte SIZEOF_MMWORD + add esi, byte 1*SIZEOF_MMWORD ; inptr + add edi, byte 2*SIZEOF_MMWORD ; outptr + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jpeg_h2v2_fancy_upsample_mmx (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v2_fancy_upsample_mmx) + +EXTN(jpeg_h2v2_fancy_upsample_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, POINTER [compptr(edx)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + test eax,eax + jz near .return + + mov ecx, POINTER [cinfo(edx)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_MMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0] + movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0] + movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][0]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][0]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][0]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][0]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][0]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][0]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-2)*BYTE_BIT + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm6 + + pand mm1,mm7 ; mm1=( 0 - - -) + pand mm2,mm7 ; mm2=( 0 - - -) + + movq MMWORD [wk(0)], mm1 + movq MMWORD [wk(1)], mm2 + + poppic ebx + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb mm1,mm1 + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT + movq mm2,mm1 + + pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) + pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + + jmp short .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1] + movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1] + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3,mm3 ; mm3=(all 0's) + movq mm4,mm0 + punpcklbw mm0,mm3 ; mm0=row[ 0][1]( 0 1 2 3) + punpckhbw mm4,mm3 ; mm4=row[ 0][1]( 4 5 6 7) + movq mm5,mm1 + punpcklbw mm1,mm3 ; mm1=row[-1][1]( 0 1 2 3) + punpckhbw mm5,mm3 ; mm5=row[-1][1]( 4 5 6 7) + movq mm6,mm2 + punpcklbw mm2,mm3 ; mm2=row[+1][1]( 0 1 2 3) + punpckhbw mm6,mm3 ; mm6=row[+1][1]( 4 5 6 7) + + pmullw mm0,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + + paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+2*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+3*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm6 + + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm1=( - - - 0) + psllq mm2,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm2=( - - - 0) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + +.upsample: + ; -- process the upper row + + movq mm7, MMWORD [edx+0*SIZEOF_MMWORD] ; mm7=Int0L=( 0 1 2 3) + movq mm3, MMWORD [edx+1*SIZEOF_MMWORD] ; mm3=Int0H=( 4 5 6 7) + + movq mm0,mm7 + movq mm4,mm3 + psrlq mm0,2*BYTE_BIT ; mm0=( 1 2 3 -) + psllq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( - - - 4) + movq mm5,mm7 + movq mm6,mm3 + psrlq mm5,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm5=( 3 - - -) + psllq mm6,2*BYTE_BIT ; mm6=( - 4 5 6) + + por mm0,mm4 ; mm0=( 1 2 3 4) + por mm5,mm6 ; mm5=( 3 4 5 6) + + movq mm1,mm7 + movq mm2,mm3 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm2,2*BYTE_BIT ; mm2=( 5 6 7 -) + movq mm4,mm3 + psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) + + por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) + por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) + + movq MMWORD [wk(0)], mm4 + + pmullw mm7,[GOTOFF(ebx,PW_THREE)] + pmullw mm3,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm5,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_SEVEN)] + paddw mm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm7 + paddw mm5,mm3 + psrlw mm1,4 ; mm1=Out0LE=( 0 2 4 6) + psrlw mm5,4 ; mm5=Out0HE=( 8 10 12 14) + paddw mm0,mm7 + paddw mm2,mm3 + psrlw mm0,4 ; mm0=Out0LO=( 1 3 5 7) + psrlw mm2,4 ; mm2=Out0HO=( 9 11 13 15) + + psllw mm0,BYTE_BIT + psllw mm2,BYTE_BIT + por mm1,mm0 ; mm1=Out0L=( 0 1 2 3 4 5 6 7) + por mm5,mm2 ; mm5=Out0H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 + + ; -- process the lower row + + movq mm6, MMWORD [edi+0*SIZEOF_MMWORD] ; mm6=Int1L=( 0 1 2 3) + movq mm4, MMWORD [edi+1*SIZEOF_MMWORD] ; mm4=Int1H=( 4 5 6 7) + + movq mm7,mm6 + movq mm3,mm4 + psrlq mm7,2*BYTE_BIT ; mm7=( 1 2 3 -) + psllq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( - - - 4) + movq mm0,mm6 + movq mm2,mm4 + psrlq mm0,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm0=( 3 - - -) + psllq mm2,2*BYTE_BIT ; mm2=( - 4 5 6) + + por mm7,mm3 ; mm7=( 1 2 3 4) + por mm0,mm2 ; mm0=( 3 4 5 6) + + movq mm1,mm6 + movq mm5,mm4 + psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm5,2*BYTE_BIT ; mm5=( 5 6 7 -) + movq mm3,mm4 + psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) + + por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) + por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) + + movq MMWORD [wk(1)], mm3 + + pmullw mm6,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm1,[GOTOFF(ebx,PW_EIGHT)] + paddw mm0,[GOTOFF(ebx,PW_EIGHT)] + paddw mm7,[GOTOFF(ebx,PW_SEVEN)] + paddw mm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw mm1,mm6 + paddw mm0,mm4 + psrlw mm1,4 ; mm1=Out1LE=( 0 2 4 6) + psrlw mm0,4 ; mm0=Out1HE=( 8 10 12 14) + paddw mm7,mm6 + paddw mm5,mm4 + psrlw mm7,4 ; mm7=Out1LO=( 1 3 5 7) + psrlw mm5,4 ; mm5=Out1HO=( 9 11 13 15) + + psllw mm7,BYTE_BIT + psllw mm5,BYTE_BIT + por mm1,mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7) + por mm0,mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm0 + + poppic ebx + + sub eax, byte SIZEOF_MMWORD + add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_MMWORD ; inptr0 + add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_MMWORD ; outptr0 + add edi, byte 2*SIZEOF_MMWORD ; outptr1 + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%ifdef UPSAMPLE_H1V2_SUPPORTED + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 1:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jpeg_h1v2_fancy_upsample_mmx (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + +%define gotptr ebp-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h1v2_fancy_upsample_mmx) + +EXTN(jpeg_h1v2_fancy_upsample_mmx): + push ebp + mov ebp,esp + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + pxor mm0,mm0 ; mm0=(all 0's) + alignx 16,7 + +.columnloop: + movq mm1, MMWORD [ebx] ; mm1=row[ 0]( 0 1 2 3 4 5 6 7) + movq mm2, MMWORD [ecx] ; mm2=row[-1]( 0 1 2 3 4 5 6 7) + movq mm3, MMWORD [esi] ; mm3=row[+1]( 0 1 2 3 4 5 6 7) + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + movq mm4,mm1 + punpcklbw mm1,mm0 ; mm1=row[ 0]( 0 1 2 3) + punpckhbw mm4,mm0 ; mm4=row[ 0]( 4 5 6 7) + movq mm5,mm2 + punpcklbw mm2,mm0 ; mm2=row[-1]( 0 1 2 3) + punpckhbw mm5,mm0 ; mm5=row[-1]( 4 5 6 7) + movq mm6,mm3 + punpcklbw mm3,mm0 ; mm3=row[+1]( 0 1 2 3) + punpckhbw mm6,mm0 ; mm6=row[+1]( 4 5 6 7) + + pmullw mm1,[GOTOFF(ebx,PW_THREE)] + pmullw mm4,[GOTOFF(ebx,PW_THREE)] + paddw mm2,[GOTOFF(ebx,PW_ONE)] + paddw mm5,[GOTOFF(ebx,PW_ONE)] + paddw mm3,[GOTOFF(ebx,PW_TWO)] + paddw mm6,[GOTOFF(ebx,PW_TWO)] + + paddw mm2,mm1 + paddw mm5,mm4 + psrlw mm2,2 ; mm2=Out0L=( 0 1 2 3) + psrlw mm5,2 ; mm5=Out0H=( 4 5 6 7) + paddw mm3,mm1 + paddw mm6,mm4 + psrlw mm3,2 ; mm3=Out1L=( 0 1 2 3) + psrlw mm6,2 ; mm6=Out1H=( 4 5 6 7) + + packuswb mm2,mm5 ; mm2=Out0=( 0 1 2 3 4 5 6 7) + packuswb mm3,mm6 ; mm3=Out1=( 0 1 2 3 4 5 6 7) + + movq MMWORD [edx], mm2 + movq MMWORD [edi], mm3 + + poppic ebx + + add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_MMWORD ; inptr0 + add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) + add edx, byte 1*SIZEOF_MMWORD ; outptr0 + add edi, byte 1*SIZEOF_MMWORD ; outptr1 + sub eax, byte SIZEOF_MMWORD + jnz near .columnloop + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + poppic eax ; remove gotptr + pop ebp + ret + +%endif ; UPSAMPLE_H1V2_SUPPORTED +%endif ; JDSAMPLE_FANCY_MMX_SUPPORTED + +%ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED + +%ifndef JDSAMPLE_FANCY_MMX_SUPPORTED +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +%endif +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jpeg_h2v1_upsample_mmx (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v1_upsample_mmx) + +EXTN(jpeg_h2v1_upsample_mmx): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jdstruct_output_width(edx)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz short .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 4*SIZEOF_MMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jpeg_h2v2_upsample_mmx (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v2_upsample_mmx) + +EXTN(jpeg_h2v2_upsample_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jdstruct_output_width(edx)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1,mm0 + punpcklbw mm0,mm0 + punpckhbw mm1,mm1 + + movq MMWORD [ebx+0*SIZEOF_MMWORD], mm0 + movq MMWORD [ebx+1*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3,mm2 + punpcklbw mm2,mm2 + punpckhbw mm3,mm3 + + movq MMWORD [ebx+2*SIZEOF_MMWORD], mm2 + movq MMWORD [ebx+3*SIZEOF_MMWORD], mm3 + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add ebx, byte 4*SIZEOF_MMWORD ; outptr0 + add edi, byte 4*SIZEOF_MMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; JDSAMPLE_SIMPLE_MMX_SUPPORTED diff --git a/jdsample.c b/jdsample.c index 80ffefb..37a6cee 100644 --- a/jdsample.c +++ b/jdsample.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 5, 2006 + * --------------------------------------------------------------------- + * * This file contains upsampling routines. * * Upsampling input data is counted in "row groups". A row group @@ -21,6 +28,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jcolsamp.h" /* Private declarations */ /* Pointer to routine to upsample a single component */ @@ -285,6 +293,37 @@ h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, } +#ifdef UPSAMPLE_H1V2_SUPPORTED + +/* + * Fast processing for the common case of 1:1 horizontal and 2:1 vertical. + * It's still a box filter. + * + * SIMD Ext: This routine is for files that are rotated or transposed + * by jpegtran. + */ + +METHODDEF(void) +h1v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + int inrow, outrow; + + inrow = outrow = 0; + while (outrow < cinfo->max_v_samp_factor) { + jcopy_sample_rows(input_data, inrow, output_data, outrow, + 1, cinfo->output_width); + jcopy_sample_rows(input_data, inrow, output_data, outrow+1, + 1, cinfo->output_width); + inrow++; + outrow += 2; + } +} + +#endif /* UPSAMPLE_H1V2_SUPPORTED */ + + /* * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. * @@ -391,6 +430,52 @@ h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, } +#ifdef UPSAMPLE_H1V2_SUPPORTED + +/* + * Fancy processing for the common case of 1:1 horizontal and 2:1 vertical. + * Again a triangle filter; see comments for h2v1 case, above. + * + * It is OK for us to reference the adjacent input rows because we demanded + * context from the main buffer controller (see initialization code). + * + * SIMD Ext: This routine is for files that are rotated or transposed + * by jpegtran. + */ + +METHODDEF(void) +h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + register JSAMPROW inptr0, inptr1, outptr; + register int colsum; + register JDIMENSION colctr; + int inrow, outrow, v; + + inrow = outrow = 0; + while (outrow < cinfo->max_v_samp_factor) { + for (v = 0; v < 2; v++) { + /* inptr0 points to nearest input row, inptr1 points to next nearest */ + inptr0 = input_data[inrow]; + if (v == 0) /* next nearest is row above */ + inptr1 = input_data[inrow-1]; + else /* next nearest is row below */ + inptr1 = input_data[inrow+1]; + outptr = output_data[outrow++]; + + for (colctr = compptr->downsampled_width; colctr > 0; colctr--) { + colsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + *outptr++ = (JSAMPLE) ((colsum + v + 1) >> 2); + } + } + inrow++; + } +} + +#endif /* UPSAMPLE_H1V2_SUPPORTED */ + + /* * Module initialization routine for upsampling. */ @@ -403,6 +488,7 @@ jinit_upsampler (j_decompress_ptr cinfo) jpeg_component_info * compptr; boolean need_buffer, do_fancy; int h_in_group, v_in_group, h_out_group, v_out_group; + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -447,18 +533,83 @@ jinit_upsampler (j_decompress_ptr cinfo) } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) { /* Special cases for 2h1v upsampling */ - if (do_fancy && compptr->downsampled_width > 2) - upsample->methods[ci] = h2v1_fancy_upsample; - else - upsample->methods[ci] = h2v1_upsample; + if (do_fancy && compptr->downsampled_width > 2) { +#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2)) + upsample->methods[ci] = jpeg_h2v1_fancy_upsample_sse2; + else +#endif +#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + if (simd & JSIMD_MMX) + upsample->methods[ci] = jpeg_h2v1_fancy_upsample_mmx; + else +#endif + upsample->methods[ci] = h2v1_fancy_upsample; + } else { +#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + upsample->methods[ci] = jpeg_h2v1_upsample_sse2; + else +#endif +#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + upsample->methods[ci] = jpeg_h2v1_upsample_mmx; + else +#endif + upsample->methods[ci] = h2v1_upsample; + } } else if (h_in_group * 2 == h_out_group && v_in_group * 2 == v_out_group) { /* Special cases for 2h2v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { - upsample->methods[ci] = h2v2_fancy_upsample; +#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2)) + upsample->methods[ci] = jpeg_h2v2_fancy_upsample_sse2; + else +#endif +#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + if (simd & JSIMD_MMX) + upsample->methods[ci] = jpeg_h2v2_fancy_upsample_mmx; + else +#endif + upsample->methods[ci] = h2v2_fancy_upsample; + upsample->pub.need_context_rows = TRUE; + } else { +#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + upsample->methods[ci] = jpeg_h2v2_upsample_sse2; + else +#endif +#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + upsample->methods[ci] = jpeg_h2v2_upsample_mmx; + else +#endif + upsample->methods[ci] = h2v2_upsample; + } +#ifdef UPSAMPLE_H1V2_SUPPORTED + } else if (h_in_group == h_out_group && + v_in_group * 2 == v_out_group) { + /* Special cases for 1h2v upsampling */ + if (do_fancy) { +#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2)) + upsample->methods[ci] = jpeg_h1v2_fancy_upsample_sse2; + else +#endif +#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + if (simd & JSIMD_MMX) + upsample->methods[ci] = jpeg_h1v2_fancy_upsample_mmx; + else +#endif + upsample->methods[ci] = h1v2_fancy_upsample; upsample->pub.need_context_rows = TRUE; } else - upsample->methods[ci] = h2v2_upsample; + upsample->methods[ci] = h1v2_upsample; +#endif /* UPSAMPLE_H1V2_SUPPORTED */ } else if ((h_out_group % h_in_group) == 0 && (v_out_group % v_in_group) == 0) { /* Generic integral-factors upsampling method */ @@ -468,11 +619,52 @@ jinit_upsampler (j_decompress_ptr cinfo) } else ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); if (need_buffer) { + enum { SIZEOF_XMMWORD = 16 }; /* from jsimdext.inc */ upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) jround_up((long) cinfo->output_width, - (long) cinfo->max_h_samp_factor), + (JDIMENSION) jround_up(jround_up((long) cinfo->output_width, + (long) cinfo->max_h_samp_factor), + (long) (2 * SIZEOF_XMMWORD)), (JDIMENSION) cinfo->max_v_samp_factor); } } } + + +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED + +GLOBAL(unsigned int) +jpeg_simd_upsampler (j_decompress_ptr cinfo, int do_fancy) +{ + unsigned int simd = jpeg_simd_support((j_common_ptr) cinfo); + +#ifdef UPSAMPLE_MERGING_SUPPORTED + if (!do_fancy) + return jpeg_simd_merged_upsampler(cinfo); +#endif + + if (do_fancy) { +#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + if (simd & JSIMD_SSE2 && + IS_CONST_ALIGNED_16(jconst_fancy_upsample_sse2)) + return JSIMD_SSE2; +#endif +#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + } else { +#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED + if (simd & JSIMD_SSE2) + return JSIMD_SSE2; +#endif +#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED + if (simd & JSIMD_MMX) + return JSIMD_MMX; +#endif + } + + return JSIMD_NONE; +} + +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ diff --git a/jdsamss2.asm b/jdsamss2.asm new file mode 100644 index 0000000..46fcf51 --- /dev/null +++ b/jdsamss2.asm @@ -0,0 +1,883 @@ +; +; jdsamss2.asm - upsampling (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jcolsamp.inc" + +%ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fancy_upsample_sse2) + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jpeg_h2v1_fancy_upsample_sse2 (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v1_fancy_upsample_sse2) + +EXTN(jpeg_h2v1_fancy_upsample_sse2): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + test eax,eax + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0,xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD] + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + pcmpeqb xmm6,xmm6 + pslldq xmm6,(SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD] + jmp short .upsample + alignx 16,7 + +.columnloop: + movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD] + pslldq xmm6,(SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2,xmm1 + movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7,xmm1 + psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm2,[GOTOFF(ebx,PW_ONE)] + paddw xmm5,[GOTOFF(ebx,PW_ONE)] + paddw xmm3,[GOTOFF(ebx,PW_TWO)] + paddw xmm6,[GOTOFF(ebx,PW_TWO)] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3,BYTE_BIT + psllw xmm6,BYTE_BIT + por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5 + + sub eax, byte SIZEOF_XMMWORD + add esi, byte 1*SIZEOF_XMMWORD ; inptr + add edi, byte 2*SIZEOF_XMMWORD ; outptr + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jpeg_h2v2_fancy_upsample_sse2 (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h2v2_fancy_upsample_sse2) + +EXTN(jpeg_h2v2_fancy_upsample_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx,eax ; edx = original ebp + mov eax, POINTER [compptr(edx)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + test eax,eax + jz near .return + + mov ecx, POINTER [cinfo(edx)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + pcmpeqb xmm7,xmm7 + psrldq xmm7,(SIZEOF_XMMWORD-2) + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + poppic ebx + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16,7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb xmm1,xmm1 + pslldq xmm1,(SIZEOF_XMMWORD-2) + movdqa xmm2,xmm1 + + pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + alignx 16,7 + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3,xmm3 ; xmm3=(all 0's) + movdqa xmm4,xmm0 + punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm1 + punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm2 + punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + + paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD] + + movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5,xmm7 + movdqa xmm6,xmm3 + psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm7 + movdqa xmm2,xmm3 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4,xmm3 + psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7,[GOTOFF(ebx,PW_THREE)] + pmullw xmm3,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm5,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm2,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm7 + paddw xmm5,xmm3 + psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0,xmm7 + paddw xmm2,xmm3 + psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0,BYTE_BIT + psllw xmm2,BYTE_BIT + por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0,xmm6 + movdqa xmm2,xmm4 + psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1,xmm6 + movdqa xmm5,xmm4 + pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3,xmm4 + psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm0,[GOTOFF(ebx,PW_EIGHT)] + paddw xmm7,[GOTOFF(ebx,PW_SEVEN)] + paddw xmm5,[GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1,xmm6 + paddw xmm0,xmm4 + psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7,xmm6 + paddw xmm5,xmm4 + psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7,BYTE_BIT + psllw xmm5,BYTE_BIT + por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0 + + poppic ebx + + sub eax, byte SIZEOF_XMMWORD + add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 + add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_XMMWORD ; outptr0 + add edi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax,eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%ifdef UPSAMPLE_H1V2_SUPPORTED + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 1:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jpeg_h1v2_fancy_upsample_sse2 (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + +%define gotptr ebp-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_h1v2_fancy_upsample_sse2) + +EXTN(jpeg_h1v2_fancy_upsample_sse2): + push ebp + mov ebp,esp + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov eax, POINTER [compptr(ebp)] + mov eax, JDIMENSION [jcompinfo_downsampled_width(eax)] ; colctr + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + pxor xmm0,xmm0 ; xmm0=(all 0's) + alignx 16,7 + +.columnloop: + movdqa xmm1, XMMWORD [ebx] ; xmm1=row[ 0]( 0 1 2 ... 13 14 15) + movdqa xmm2, XMMWORD [ecx] ; xmm2=row[-1]( 0 1 2 ... 13 14 15) + movdqa xmm3, XMMWORD [esi] ; xmm3=row[+1]( 0 1 2 ... 13 14 15) + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + movdqa xmm4,xmm1 + punpcklbw xmm1,xmm0 ; xmm1=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4,xmm0 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5,xmm2 + punpcklbw xmm2,xmm0 ; xmm2=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5,xmm0 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6,xmm3 + punpcklbw xmm3,xmm0 ; xmm3=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6,xmm0 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm1,[GOTOFF(ebx,PW_THREE)] + pmullw xmm4,[GOTOFF(ebx,PW_THREE)] + paddw xmm2,[GOTOFF(ebx,PW_ONE)] + paddw xmm5,[GOTOFF(ebx,PW_ONE)] + paddw xmm3,[GOTOFF(ebx,PW_TWO)] + paddw xmm6,[GOTOFF(ebx,PW_TWO)] + + paddw xmm2,xmm1 + paddw xmm5,xmm4 + psrlw xmm2,2 ; xmm2=Out0L=( 0 1 2 3 4 5 6 7) + psrlw xmm5,2 ; xmm5=Out0H=( 8 9 10 11 12 13 14 15) + paddw xmm3,xmm1 + paddw xmm6,xmm4 + psrlw xmm3,2 ; xmm3=Out1L=( 0 1 2 3 4 5 6 7) + psrlw xmm6,2 ; xmm6=Out1H=( 8 9 10 11 12 13 14 15) + + packuswb xmm2,xmm5 ; xmm2=Out0=( 0 1 2 ... 13 14 15) + packuswb xmm3,xmm6 ; xmm3=Out1=( 0 1 2 ... 13 14 15) + + movdqa XMMWORD [edx], xmm2 + movdqa XMMWORD [edi], xmm3 + + poppic ebx + + add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 + add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add edx, byte 1*SIZEOF_XMMWORD ; outptr0 + add edi, byte 1*SIZEOF_XMMWORD ; outptr1 + sub eax, byte SIZEOF_XMMWORD + jnz near .columnloop + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + poppic eax ; remove gotptr + pop ebp + ret + +%endif ; UPSAMPLE_H1V2_SUPPORTED +%endif ; JDSAMPLE_FANCY_SSE2_SUPPORTED + +%ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED + +%ifndef JDSAMPLE_FANCY_SSE2_SUPPORTED +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +%endif +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jpeg_h2v1_upsample_sse2 (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v1_upsample_sse2) + +EXTN(jpeg_h2v1_upsample_sse2): + push ebp + mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jdstruct_output_width(edx)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz short .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jpeg_h2v2_upsample_sse2 (j_decompress_ptr cinfo, +; jpeg_component_info * compptr, +; JSAMPARRAY input_data, +; JSAMPARRAY * output_data_ptr); +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define input_data(b) (b)+16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr + + align 16 + global EXTN(jpeg_h2v2_upsample_sse2) + +EXTN(jpeg_h2v2_upsample_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, POINTER [cinfo(ebp)] + mov edx, JDIMENSION [jdstruct_output_width(edx)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov ecx, POINTER [cinfo(ebp)] + mov ecx, INT [jdstruct_max_v_samp_factor(ecx)] ; rowctr + test ecx,ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16,7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax,edx ; colctr + alignx 16,7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1,xmm0 + punpcklbw xmm0,xmm0 + punpckhbw xmm1,xmm1 + + movdqa XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3,xmm2 + punpcklbw xmm2,xmm2 + punpckhbw xmm3,xmm3 + + movdqa XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add ebx, byte 4*SIZEOF_XMMWORD ; outptr0 + add edi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + alignx 16,7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; JDSAMPLE_SIMPLE_SSE2_SUPPORTED diff --git a/jf3dnflt.asm b/jf3dnflt.asm new file mode 100644 index 0000000..7117dd4 --- /dev/null +++ b/jf3dnflt.asm @@ -0,0 +1,327 @@ +; +; jf3dnflt.asm - floating-point FDCT (3DNow!) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_3dnow) + +EXTN(jconst_fdct_float_3dnow): + +PD_0_382 times 2 dd 0.382683432365089771728460 +PD_0_707 times 2 dd 0.707106781186547524400844 +PD_0_541 times 2 dd 0.541196100146196984399723 +PD_1_306 times 2 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_float_3dnow (FAST_FLOAT * data) +; + +%define data(b) (b)+8 ; FAST_FLOAT * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_fdct_float_3dnow) + +EXTN(jpeg_fdct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 10)=data0 + punpckhdq mm4,mm1 ; mm4=(01 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(06 16)=data6 + punpckhdq mm5,mm3 ; mm5=(07 17)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(02 12)=data2 + punpckhdq mm4,mm3 ; mm4=(03 13)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(04 14)=data4 + punpckhdq mm0,mm5 ; mm0=(05 15)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71) + + movq mm4,mm0 ; transpose coefficients + punpckldq mm0,mm1 ; mm0=(00 01)=data0 + punpckhdq mm4,mm1 ; mm4=(10 11)=data1 + movq mm5,mm2 ; transpose coefficients + punpckldq mm2,mm3 ; mm2=(60 61)=data6 + punpckhdq mm5,mm3 ; mm5=(70 71)=data7 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm2 ; mm4=data1-data6=tmp6 + pfsub mm0,mm5 ; mm0=data0-data7=tmp7 + pfadd mm6,mm2 ; mm6=data1+data6=tmp1 + pfadd mm7,mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4,mm1 ; transpose coefficients + punpckldq mm1,mm3 ; mm1=(20 21)=data2 + punpckhdq mm4,mm3 ; mm4=(30 31)=data3 + movq mm0,mm2 ; transpose coefficients + punpckldq mm2,mm5 ; mm2=(40 41)=data4 + punpckhdq mm0,mm5 ; mm0=(50 51)=data5 + + movq mm3,mm4 + movq mm5,mm1 + pfadd mm4,mm2 ; mm4=data3+data4=tmp3 + pfadd mm1,mm0 ; mm1=data2+data5=tmp2 + pfsub mm3,mm2 ; mm3=data3-data4=tmp4 + pfsub mm5,mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2,mm7 + movq mm0,mm6 + pfsub mm7,mm4 ; mm7=tmp13 + pfsub mm6,mm1 ; mm6=tmp12 + pfadd mm2,mm4 ; mm2=tmp10 + pfadd mm0,mm1 ; mm0=tmp11 + + pfadd mm6,mm7 + pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4,mm2 + movq mm1,mm7 + pfsub mm2,mm0 ; mm2=data4 + pfsub mm7,mm6 ; mm7=data6 + pfadd mm4,mm0 ; mm4=data0 + pfadd mm1,mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3,mm5 ; mm3=tmp10 + pfadd mm5,mm0 ; mm5=tmp11 + pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2,mm3 ; mm2=tmp10 + pfsub mm3,mm0 + pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2,mm3 ; mm2=z2 + pfadd mm0,mm3 ; mm0=z4 + + movq mm7,mm6 + pfsub mm6,mm5 ; mm6=z13 + pfadd mm7,mm5 ; mm7=z11 + + movq mm4,mm6 + movq mm1,mm7 + pfsub mm6,mm2 ; mm6=data3 + pfsub mm7,mm0 ; mm7=data7 + pfadd mm4,mm2 ; mm4=data5 + pfadd mm1,mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + + femms ; empty MMX/3DNow! state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_FLT_3DNOW_MMX_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jfdctflt.asm b/jfdctflt.asm new file mode 100644 index 0000000..178e1f9 --- /dev/null +++ b/jfdctflt.asm @@ -0,0 +1,288 @@ +; +; jfdctflt.asm - floating-point FDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +%define ROTATOR_TYPE FP32 ; float + + alignz 16 + global EXTN(jconst_fdct_float) + +EXTN(jconst_fdct_float): + +F_0_382 dd 0.382683432365089771728460 ; cos(PI*3/8) +F_0_707 dd 0.707106781186547524400844 ; cos(PI*1/4) +F_0_541 dd 0.541196100146196984399723 ; cos(PI*1/8)-cos(PI*3/8) +F_1_306 dd 1.306562964876376527856643 ; cos(PI*1/8)+cos(PI*3/8) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_float (FAST_FLOAT * data) +; + +%define data(b) (b)+8 ; FAST_FLOAT * data + + align 16 + global EXTN(jpeg_fdct_float) + +EXTN(jpeg_fdct_float): + push ebp + mov ebp,esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(ebp)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE + alignx 16,7 +.rowloop: + fld FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)] + + ; -- Even part + + fld st2 ; st2 = st2 + st1, st1 = st2 - st1 + fsub st0,st2 + fxch st0,st2 + faddp st3,st0 + fld st3 ; st3 = st3 + st0, st0 = st3 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st4,st0 + + fadd st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_707)] + + fld st2 ; st3 = st2 + st3, st2 = st2 - st3 + fsub st0,st4 + fxch st0,st3 + faddp st4,st0 + fld st1 ; st0 = st1 + st0, st1 = st1 - st0 + fsub st0,st1 + fxch st0,st2 + faddp st1,st0 + + fld FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + + fstp FAST_FLOAT [ROW(2,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(6,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(4,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(0,edx,SIZEOF_FAST_FLOAT)] + + ; -- Odd part + + fadd st2,st0 + fadd st0,st1 + fxch st0,st3 + fadd st1,st0 + fxch st0,st3 + + fld st2 + fxch st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_707)] + fxch st0,st1 + fsub st0,st2 + fxch st0,st3 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_541)] + fxch st0,st3 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_382)] + fxch st0,st2 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_306)] + fxch st0,st2 + fadd st3,st0 + faddp st2,st0 + + fld st3 ; st3 = st3 + st0, st0 = st3 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st4,st0 + + fld st2 ; st0 = st0 + st2, st2 = st0 - st2 + fsubr st0,st1 + fxch st0,st3 + faddp st1,st0 + fld st1 ; st3 = st3 + st1, st1 = st3 - st1 + fsubr st0,st4 + fxch st0,st2 + faddp st4,st0 + + fstp FAST_FLOAT [ROW(5,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(7,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(3,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [ROW(1,edx,SIZEOF_FAST_FLOAT)] + + add edx, byte DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx ; advance pointer to next row + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(ebp)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE + alignx 16,7 +.columnloop: + fld FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)] + fadd FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)] + + ; -- Even part + + fld st2 ; st2 = st2 + st1, st1 = st2 - st1 + fsub st0,st2 + fxch st0,st2 + faddp st3,st0 + fld st3 ; st3 = st3 + st0, st0 = st3 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st4,st0 + + fadd st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_707)] + + fld st2 ; st3 = st2 + st3, st2 = st2 - st3 + fsub st0,st4 + fxch st0,st3 + faddp st4,st0 + fld st1 ; st0 = st1 + st0, st1 = st1 - st0 + fsub st0,st1 + fxch st0,st2 + faddp st1,st0 + + fld FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + fld FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)] + fsub FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)] + fxch st0,st4 + + fstp FAST_FLOAT [COL(2,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(6,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(4,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(0,edx,SIZEOF_FAST_FLOAT)] + + ; -- Odd part + + fadd st2,st0 + fadd st0,st1 + fxch st0,st3 + fadd st1,st0 + fxch st0,st3 + + fld st2 + fxch st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_707)] + fxch st0,st1 + fsub st0,st2 + fxch st0,st3 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_541)] + fxch st0,st3 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_0_382)] + fxch st0,st2 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_306)] + fxch st0,st2 + fadd st3,st0 + faddp st2,st0 + + fld st3 ; st3 = st3 + st0, st0 = st3 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st4,st0 + + fld st2 ; st0 = st0 + st2, st2 = st0 - st2 + fsubr st0,st1 + fxch st0,st3 + faddp st1,st0 + fld st1 ; st3 = st3 + st1, st1 = st3 - st1 + fsubr st0,st4 + fxch st0,st2 + faddp st4,st0 + + fstp FAST_FLOAT [COL(5,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(7,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(3,edx,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(1,edx,SIZEOF_FAST_FLOAT)] + + add edx, byte SIZEOF_FAST_FLOAT ; advance pointer to next column + dec ecx + jnz near .columnloop + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jfdctfst.asm b/jfdctfst.asm new file mode 100644 index 0000000..c73c920 --- /dev/null +++ b/jfdctfst.asm @@ -0,0 +1,303 @@ +; +; jfdctfst.asm - fast integer FDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctfst.c; see the jfdctfst.c for +; more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +; We can gain a little more speed, with a further compromise in accuracy, +; by omitting the addition in a descaling shift. This yields an +; incorrectly rounded result half the time... +; +%macro descale 2 +%ifdef USE_ACCURATE_ROUNDING +%if (%2)<=7 + add %1, byte (1<<((%2)-1)) ; add reg32,imm8 +%else + add %1, (1<<((%2)-1)) ; add reg32,imm32 +%endif +%endif + sar %1,%2 +%endmacro + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_ifast (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + + align 16 + global EXTN(jpeg_fdct_ifast) + +EXTN(jpeg_fdct_ifast): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process rows. + + mov ecx, DCTSIZE + mov edx, POINTER [data(ebp)] ; (DCTELEM *) + alignx 16,7 +.rowloop: + push ecx ; ctr + push edx ; dataptr + + movsx eax, DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)] + movsx edi, DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)] + lea esi,[eax+edi] ; esi=tmp0 + sub eax,edi ; eax=tmp7 + push eax + + movsx ebx, DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)] + lea edi,[ebx+ecx] ; edi=tmp1 + sub ebx,ecx ; ebx=tmp6 + push ebx + + movsx eax, DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)] + lea ebx,[eax+ecx] ; ebx=tmp2 + sub eax,ecx ; eax=tmp5 + push eax + + movsx ecx, DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)] + movsx eax, DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)] + lea edx,[ecx+eax] ; edx=tmp3 + sub ecx,eax ; ecx=tmp4 + push ecx + + ; -- Even part + + lea eax,[esi+edx] ; eax=tmp10 + lea ecx,[edi+ebx] ; ecx=tmp11 + sub esi,edx ; esi=tmp13 + sub edi,ebx ; edi=tmp12 + + mov edx, POINTER [esp+16] ; dataptr + + add edi,esi + imul edi,(F_0_707) ; edi=z1 + descale edi,CONST_BITS + + lea ebx,[eax+ecx] ; ebx=data0 + sub eax,ecx ; eax=data4 + mov DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)], ax + + lea ecx,[esi+edi] ; ecx=data2 + sub esi,edi ; esi=data6 + mov DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)], si + + ; -- Odd part + + pop eax ; eax=tmp4 + pop edx ; edx=tmp5 + pop ebx ; ebx=tmp6 + pop edi ; edi=tmp7 + + add eax,edx ; eax=tmp10 + add edx,ebx ; edx=tmp11 + add ebx,edi ; ebx=tmp12, edi=tmp7 + + imul edx,(F_0_707) ; edx=z3 + descale edx,CONST_BITS + lea esi,[edi+edx] ; esi=z11 + sub edi,edx ; edi=z13 + + mov ecx,eax ; ecx=tmp10 + sub eax,ebx + imul eax,(F_0_382) ; eax=z5 + imul ecx,(F_0_541) ; ecx=MULTIPLY(tmp10,FIX_0_541196100) + imul ebx,(F_1_306) ; ebx=MULTIPLY(tmp12,FIX_1_306562965) + descale eax,CONST_BITS + descale ecx,CONST_BITS + descale ebx,CONST_BITS + add ecx,eax ; ecx=z2 + add ebx,eax ; ebx=z4 + + pop edx ; dataptr + + lea eax,[edi+ecx] ; eax=data5 + sub edi,ecx ; edi=data3 + mov DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)], ax + mov DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)], di + + lea ecx,[esi+ebx] ; ecx=data1 + sub esi,ebx ; esi=data7 + mov DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)], si + + pop ecx ; ctr + + add edx, byte DCTSIZE*SIZEOF_DCTELEM + dec ecx ; advance pointer to next row + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov ecx, DCTSIZE + mov edx, POINTER [data(ebp)] ; (DCTELEM *) + alignx 16,7 +.columnloop: + push ecx ; ctr + push edx ; dataptr + + movsx eax, DCTELEM [COL(0,edx,SIZEOF_DCTELEM)] + movsx edi, DCTELEM [COL(7,edx,SIZEOF_DCTELEM)] + lea esi,[eax+edi] ; esi=tmp0 + sub eax,edi ; eax=tmp7 + push eax + + movsx ebx, DCTELEM [COL(1,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [COL(6,edx,SIZEOF_DCTELEM)] + lea edi,[ebx+ecx] ; edi=tmp1 + sub ebx,ecx ; ebx=tmp6 + push ebx + + movsx eax, DCTELEM [COL(2,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [COL(5,edx,SIZEOF_DCTELEM)] + lea ebx,[eax+ecx] ; ebx=tmp2 + sub eax,ecx ; eax=tmp5 + push eax + + movsx ecx, DCTELEM [COL(3,edx,SIZEOF_DCTELEM)] + movsx eax, DCTELEM [COL(4,edx,SIZEOF_DCTELEM)] + lea edx,[ecx+eax] ; edx=tmp3 + sub ecx,eax ; ecx=tmp4 + push ecx + + ; -- Even part + + lea eax,[esi+edx] ; eax=tmp10 + lea ecx,[edi+ebx] ; ecx=tmp11 + sub esi,edx ; esi=tmp13 + sub edi,ebx ; edi=tmp12 + + mov edx, POINTER [esp+16] ; dataptr + + add edi,esi + imul edi,(F_0_707) ; edi=z1 + descale edi,CONST_BITS + + lea ebx,[eax+ecx] ; ebx=data0 + sub eax,ecx ; eax=data4 + mov DCTELEM [COL(0,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [COL(4,edx,SIZEOF_DCTELEM)], ax + + lea ecx,[esi+edi] ; ecx=data2 + sub esi,edi ; esi=data6 + mov DCTELEM [COL(2,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [COL(6,edx,SIZEOF_DCTELEM)], si + + ; -- Odd part + + pop eax ; eax=tmp4 + pop edx ; edx=tmp5 + pop ebx ; ebx=tmp6 + pop edi ; edi=tmp7 + + add eax,edx ; eax=tmp10 + add edx,ebx ; edx=tmp11 + add ebx,edi ; ebx=tmp12, edi=tmp7 + + imul edx,(F_0_707) ; edx=z3 + descale edx,CONST_BITS + lea esi,[edi+edx] ; esi=z11 + sub edi,edx ; edi=z13 + + mov ecx,eax ; ecx=tmp10 + sub eax,ebx + imul eax,(F_0_382) ; eax=z5 + imul ecx,(F_0_541) ; ecx=MULTIPLY(tmp10,FIX_0_541196100) + imul ebx,(F_1_306) ; ebx=MULTIPLY(tmp12,FIX_1_306562965) + descale eax,CONST_BITS + descale ecx,CONST_BITS + descale ebx,CONST_BITS + add ecx,eax ; ecx=z2 + add ebx,eax ; ebx=z4 + + pop edx ; dataptr + + lea eax,[edi+ecx] ; eax=data5 + sub edi,ecx ; edi=data3 + mov DCTELEM [COL(5,edx,SIZEOF_DCTELEM)], ax + mov DCTELEM [COL(3,edx,SIZEOF_DCTELEM)], di + + lea ecx,[esi+ebx] ; ecx=data1 + sub esi,ebx ; esi=data7 + mov DCTELEM [COL(1,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [COL(7,edx,SIZEOF_DCTELEM)], si + + pop ecx ; ctr + + add edx, byte SIZEOF_DCTELEM ; advance pointer to next column + dec ecx + jnz near .columnloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; DCT_IFAST_SUPPORTED diff --git a/jfdctint.asm b/jfdctint.asm new file mode 100644 index 0000000..0f29725 --- /dev/null +++ b/jfdctint.asm @@ -0,0 +1,342 @@ +; +; jfdctint.asm - accurate integer FDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +; Descale and correctly round a DWORD value that's scaled by N bits. +; +%macro descale 2 +%if (%2)<=7 + add %1, byte (1<<((%2)-1)) ; add reg32,imm8 +%else + add %1, (1<<((%2)-1)) ; add reg32,imm32 +%endif + sar %1,%2 +%endmacro + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_islow (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + + align 16 + global EXTN(jpeg_fdct_islow) + +EXTN(jpeg_fdct_islow): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(ebp)] ; (DCTELEM *) + mov ecx, DCTSIZE + alignx 16,7 +.rowloop: + movsx eax, DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)] + movsx edi, DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)] + lea esi,[eax+edi] ; esi=tmp0 + sub eax,edi ; eax=tmp7 + push ecx ; ctr + push eax + + movsx ebx, DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)] + lea edi,[ebx+ecx] ; edi=tmp1 + sub ebx,ecx ; ebx=tmp6 + push ebx + + movsx eax, DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)] + lea ebx,[eax+ecx] ; ebx=tmp2 + sub eax,ecx ; eax=tmp5 + push edx ; dataptr + push eax + + movsx ecx, DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)] + movsx eax, DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)] + lea edx,[ecx+eax] ; edx=tmp3 + sub ecx,eax ; ecx=tmp4 + push ecx + + ; -- Even part + + lea eax,[esi+edx] ; eax=tmp10 + lea ecx,[edi+ebx] ; ecx=tmp11 + sub esi,edx ; esi=tmp13 + sub edi,ebx ; edi=tmp12 + + lea ebx,[eax+ecx] ; ebx=data0 + sub eax,ecx ; eax=data4 + mov edx, POINTER [esp+8] ; dataptr + sal ebx, PASS1_BITS + sal eax, PASS1_BITS + mov DCTELEM [ROW(0,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [ROW(4,edx,SIZEOF_DCTELEM)], ax + + lea ecx,[edi+esi] + imul ecx,(F_0_541) ; ecx=z1 + imul esi,(F_0_765) ; esi=MULTIPLY(tmp13,FIX_0_765366865) + imul edi,(-F_1_847) ; edi=MULTIPLY(tmp12,-FIX_1_847759065) + add esi,ecx ; esi=data2 + add edi,ecx ; edi=data6 + descale esi,(CONST_BITS-PASS1_BITS) + descale edi,(CONST_BITS-PASS1_BITS) + mov DCTELEM [ROW(2,edx,SIZEOF_DCTELEM)], si + mov DCTELEM [ROW(6,edx,SIZEOF_DCTELEM)], di + + ; -- Odd part + + mov eax, INT32 [esp] ; eax=tmp4 + mov ebx, INT32 [esp+4] ; ebx=tmp5 + mov ecx, INT32 [esp+12] ; ecx=tmp6 + mov esi, INT32 [esp+16] ; esi=tmp7 + + lea edx,[eax+ecx] ; edx=z3 + lea edi,[ebx+esi] ; edi=z4 + add eax,esi ; eax=z1 + add ebx,ecx ; ebx=z2 + + lea esi,[edx+edi] + imul esi,(F_1_175) ; esi=z5 + + imul edx,(-F_1_961) ; edx=z3(=MULTIPLY(z3,-FIX_1_961570560)) + imul edi,(-F_0_390) ; edi=z4(=MULTIPLY(z4,-FIX_0_390180644)) + imul eax,(-F_0_899) ; eax=z1(=MULTIPLY(z1,-FIX_0_899976223)) + imul ebx,(-F_2_562) ; ebx=z2(=MULTIPLY(z2,-FIX_2_562915447)) + + add edx,esi ; edx=z3(=z3+z5) + add edi,esi ; edi=z4(=z4+z5) + + lea ecx,[eax+edx] ; ecx=z1+z3 + lea esi,[ebx+edi] ; esi=z2+z4 + add eax,edi ; eax=z1+z4 + add ebx,edx ; ebx=z2+z3 + + pop edx ; edx=tmp4 + pop edi ; edi=tmp5 + imul edx,(F_0_298) ; edx=tmp4(=MULTIPLY(tmp4,FIX_0_298631336)) + imul edi,(F_2_053) ; edi=tmp5(=MULTIPLY(tmp5,FIX_2_053119869)) + add ecx,edx ; ecx=data7(=tmp4+z1+z3) + add esi,edi ; esi=data5(=tmp5+z2+z4) + pop edx ; dataptr + descale ecx,(CONST_BITS-PASS1_BITS) + descale esi,(CONST_BITS-PASS1_BITS) + mov DCTELEM [ROW(7,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [ROW(5,edx,SIZEOF_DCTELEM)], si + + pop edi ; edi=tmp6 + pop ecx ; ecx=tmp7 + imul edi,(F_3_072) ; edi=tmp6(=MULTIPLY(tmp6,FIX_3_072711026)) + imul ecx,(F_1_501) ; ecx=tmp7(=MULTIPLY(tmp7,FIX_1_501321110)) + add ebx,edi ; ebx=data3(=tmp6+z2+z3) + add eax,ecx ; eax=data1(=tmp7+z1+z4) + pop ecx ; ctr + descale ebx,(CONST_BITS-PASS1_BITS) + descale eax,(CONST_BITS-PASS1_BITS) + mov DCTELEM [ROW(3,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [ROW(1,edx,SIZEOF_DCTELEM)], ax + + add edx, byte DCTSIZE*SIZEOF_DCTELEM + dec ecx ; advance pointer to next row + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(ebp)] ; (DCTELEM *) + mov ecx, DCTSIZE + alignx 16,7 +.columnloop: + movsx eax, DCTELEM [COL(0,edx,SIZEOF_DCTELEM)] + movsx edi, DCTELEM [COL(7,edx,SIZEOF_DCTELEM)] + lea esi,[eax+edi] ; esi=tmp0 + sub eax,edi ; eax=tmp7 + push ecx ; ctr + push eax + + movsx ebx, DCTELEM [COL(1,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [COL(6,edx,SIZEOF_DCTELEM)] + lea edi,[ebx+ecx] ; edi=tmp1 + sub ebx,ecx ; ebx=tmp6 + push ebx + + movsx eax, DCTELEM [COL(2,edx,SIZEOF_DCTELEM)] + movsx ecx, DCTELEM [COL(5,edx,SIZEOF_DCTELEM)] + lea ebx,[eax+ecx] ; ebx=tmp2 + sub eax,ecx ; eax=tmp5 + push edx ; dataptr + push eax + + movsx ecx, DCTELEM [COL(3,edx,SIZEOF_DCTELEM)] + movsx eax, DCTELEM [COL(4,edx,SIZEOF_DCTELEM)] + lea edx,[ecx+eax] ; edx=tmp3 + sub ecx,eax ; ecx=tmp4 + push ecx + + ; -- Even part + + lea eax,[esi+edx] ; eax=tmp10 + lea ecx,[edi+ebx] ; ecx=tmp11 + sub esi,edx ; esi=tmp13 + sub edi,ebx ; edi=tmp12 + + lea ebx,[eax+ecx] ; ebx=data0 + sub eax,ecx ; eax=data4 + mov edx, POINTER [esp+8] ; dataptr + descale ebx, PASS1_BITS + descale eax, PASS1_BITS + mov DCTELEM [COL(0,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [COL(4,edx,SIZEOF_DCTELEM)], ax + + lea ecx,[edi+esi] + imul ecx,(F_0_541) ; ecx=z1 + imul esi,(F_0_765) ; esi=MULTIPLY(tmp13,FIX_0_765366865) + imul edi,(-F_1_847) ; edi=MULTIPLY(tmp12,-FIX_1_847759065) + add esi,ecx ; esi=data2 + add edi,ecx ; edi=data6 + descale esi,(CONST_BITS+PASS1_BITS) + descale edi,(CONST_BITS+PASS1_BITS) + mov DCTELEM [COL(2,edx,SIZEOF_DCTELEM)], si + mov DCTELEM [COL(6,edx,SIZEOF_DCTELEM)], di + + ; -- Odd part + + mov eax, INT32 [esp] ; eax=tmp4 + mov ebx, INT32 [esp+4] ; ebx=tmp5 + mov ecx, INT32 [esp+12] ; ecx=tmp6 + mov esi, INT32 [esp+16] ; esi=tmp7 + + lea edx,[eax+ecx] ; edx=z3 + lea edi,[ebx+esi] ; edi=z4 + add eax,esi ; eax=z1 + add ebx,ecx ; ebx=z2 + + lea esi,[edx+edi] + imul esi,(F_1_175) ; esi=z5 + + imul edx,(-F_1_961) ; edx=z3(=MULTIPLY(z3,-FIX_1_961570560)) + imul edi,(-F_0_390) ; edi=z4(=MULTIPLY(z4,-FIX_0_390180644)) + imul eax,(-F_0_899) ; eax=z1(=MULTIPLY(z1,-FIX_0_899976223)) + imul ebx,(-F_2_562) ; ebx=z2(=MULTIPLY(z2,-FIX_2_562915447)) + + add edx,esi ; edx=z3(=z3+z5) + add edi,esi ; edi=z4(=z4+z5) + + lea ecx,[eax+edx] ; ecx=z1+z3 + lea esi,[ebx+edi] ; esi=z2+z4 + add eax,edi ; eax=z1+z4 + add ebx,edx ; ebx=z2+z3 + + pop edx ; edx=tmp4 + pop edi ; edi=tmp5 + imul edx,(F_0_298) ; edx=tmp4(=MULTIPLY(tmp4,FIX_0_298631336)) + imul edi,(F_2_053) ; edi=tmp5(=MULTIPLY(tmp5,FIX_2_053119869)) + add ecx,edx ; ecx=data7(=tmp4+z1+z3) + add esi,edi ; esi=data5(=tmp5+z2+z4) + pop edx ; dataptr + descale ecx,(CONST_BITS+PASS1_BITS) + descale esi,(CONST_BITS+PASS1_BITS) + mov DCTELEM [COL(7,edx,SIZEOF_DCTELEM)], cx + mov DCTELEM [COL(5,edx,SIZEOF_DCTELEM)], si + + pop edi ; edi=tmp6 + pop ecx ; ecx=tmp7 + imul edi,(F_3_072) ; edi=tmp6(=MULTIPLY(tmp6,FIX_3_072711026)) + imul ecx,(F_1_501) ; ecx=tmp7(=MULTIPLY(tmp7,FIX_1_501321110)) + add ebx,edi ; ebx=data3(=tmp6+z2+z3) + add eax,ecx ; eax=data1(=tmp7+z1+z4) + pop ecx ; ctr + descale ebx,(CONST_BITS+PASS1_BITS) + descale eax,(CONST_BITS+PASS1_BITS) + mov DCTELEM [COL(3,edx,SIZEOF_DCTELEM)], bx + mov DCTELEM [COL(1,edx,SIZEOF_DCTELEM)], ax + + add edx, byte SIZEOF_DCTELEM ; advance pointer to next column + dec ecx + jnz near .columnloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jfmmxfst.asm b/jfmmxfst.asm new file mode 100644 index 0000000..2f8d53f --- /dev/null +++ b/jfmmxfst.asm @@ -0,0 +1,404 @@ +; +; jfmmxfst.asm - fast integer FDCT (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED +%ifdef JFDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_mmx) + +EXTN(jconst_fdct_ifast_mmx): + +PW_F0707 times 4 dw F_0_707 << CONST_SHIFT +PW_F0382 times 4 dw F_0_382 << CONST_SHIFT +PW_F0541 times 4 dw F_0_541 << CONST_SHIFT +PW_F1306 times 4 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_ifast_mmx (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_fdct_ifast_mmx) + +EXTN(jpeg_fdct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + psubw mm5,mm7 ; mm5=tmp13 + psubw mm0,mm4 ; mm0=tmp12 + paddw mm1,mm7 ; mm1=tmp10 + paddw mm6,mm4 ; mm6=tmp11 + + paddw mm0,mm5 + psllw mm0,PRE_MULTIPLY_SCALE_BITS + pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7,mm1 + movq mm4,mm5 + psubw mm1,mm6 ; mm1=data4 + psubw mm5,mm0 ; mm5=data6 + paddw mm7,mm6 ; mm7=data0 + paddw mm4,mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2,mm3 ; mm2=tmp10 + paddw mm3,mm6 ; mm3=tmp11 + paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm6,PRE_MULTIPLY_SCALE_BITS + + psllw mm3,PRE_MULTIPLY_SCALE_BITS + pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1,mm2 ; mm1=tmp10 + psubw mm2,mm6 + pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1,mm2 ; mm1=z2 + paddw mm6,mm2 ; mm6=z4 + + movq mm5,mm0 + psubw mm0,mm3 ; mm0=z13 + paddw mm5,mm3 ; mm5=z11 + + movq mm7,mm0 + movq mm4,mm5 + psubw mm0,mm1 ; mm0=data3 + psubw mm5,mm6 ; mm5=data7 + paddw mm7,mm1 ; mm7=data5 + paddw mm4,mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_INT_MMX_SUPPORTED +%endif ; DCT_IFAST_SUPPORTED diff --git a/jfmmxint.asm b/jfmmxint.asm new file mode 100644 index 0000000..afe47fd --- /dev/null +++ b/jfmmxint.asm @@ -0,0 +1,629 @@ +; +; jfmmxint.asm - accurate integer FDCT (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED +%ifdef JFDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_mmx) + +EXTN(jconst_fdct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_islow_mmx (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_fdct_islow_mmx) + +EXTN(jpeg_fdct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(24 34 25 35) + punpckhwd mm5,mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm4,mm7 ; mm4=(02 12 03 13) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(04 14 05 15) + punpckhwd mm2,mm3 ; mm2=(06 16 07 17) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + psllw mm5,PASS1_BITS ; mm5=data0 + psllw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4,DESCALE_P1 + psrad mm1,DESCALE_P1 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm2,DESCALE_P1 + psrad mm7,DESCALE_P1 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm3,DESCALE_P1 + psrad mm5,DESCALE_P1 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm1 ; mm0=(02 03 12 13) + punpckhwd mm4,mm1 ; mm4=(22 23 32 33) + movq mm5,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm3 ; mm2=(42 43 52 53) + punpckhwd mm5,mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 01 10 11) + punpckhwd mm4,mm7 ; mm4=(20 21 30 31) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm3 ; mm1=(40 41 50 51) + punpckhwd mm2,mm3 ; mm2=(60 61 70 71) + + movq mm7,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 + movq mm3,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0,mm7 + movq mm5,mm6 + psubw mm7,mm2 ; mm7=data1-data6=tmp6 + psubw mm6,mm3 ; mm6=data0-data7=tmp7 + paddw mm0,mm2 ; mm0=data1+data6=tmp1 + paddw mm5,mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7,mm4 ; transpose coefficients(phase 2) + punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2,mm7 + movq mm3,mm4 + paddw mm7,mm1 ; mm7=data3+data4=tmp3 + paddw mm4,mm6 ; mm4=data2+data5=tmp2 + psubw mm2,mm1 ; mm2=data3-data4=tmp4 + psubw mm3,mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1,mm5 + movq mm6,mm0 + paddw mm5,mm7 ; mm5=tmp10 + paddw mm0,mm4 ; mm0=tmp11 + psubw mm1,mm7 ; mm1=tmp13 + psubw mm6,mm4 ; mm6=tmp12 + + movq mm7,mm5 + paddw mm5,mm0 ; mm5=tmp10+tmp11 + psubw mm7,mm0 ; mm7=tmp10-tmp11 + + paddw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw mm5,PASS1_BITS ; mm5=data0 + psraw mm7,PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4,mm1 ; mm1=tmp13 + movq mm0,mm1 + punpcklwd mm4,mm6 ; mm6=tmp12 + punpckhwd mm0,mm6 + movq mm1,mm4 + movq mm6,mm0 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm0,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm0,DESCALE_P2 + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm6,DESCALE_P2 + + packssdw mm4,mm0 ; mm4=data2 + packssdw mm1,mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0,mm2 ; mm2=tmp4 + movq mm6,mm3 ; mm3=tmp5 + paddw mm0,mm5 ; mm0=z3 + paddw mm6,mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4,mm0 + movq mm1,mm0 + punpcklwd mm4,mm6 + punpckhwd mm1,mm6 + movq mm0,mm4 + movq mm6,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4,mm2 + movq mm1,mm2 + punpcklwd mm4,mm7 + punpckhwd mm1,mm7 + movq mm2,mm4 + movq mm7,mm1 + pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + + paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm2,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm2,DESCALE_P2 + psrad mm7,DESCALE_P2 + + packssdw mm4,mm1 ; mm4=data7 + packssdw mm2,mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1,mm3 + movq mm7,mm3 + punpcklwd mm1,mm5 + punpckhwd mm7,mm5 + movq mm3,mm1 + movq mm5,mm7 + pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm3,DESCALE_P2 + psrad mm5,DESCALE_P2 + + packssdw mm1,mm7 ; mm1=data5 + packssdw mm3,mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_INT_MMX_SUPPORTED +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jfss2fst.asm b/jfss2fst.asm new file mode 100644 index 0000000..567bcef --- /dev/null +++ b/jfss2fst.asm @@ -0,0 +1,411 @@ +; +; jfss2fst.asm - fast integer FDCT (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED +%ifdef JFDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_fdct_ifast_sse2) + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_ifast_sse2 (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_fdct_ifast_sse2) + +EXTN(jpeg_fdct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + psubw xmm3,xmm1 ; xmm3=tmp13 + psubw xmm6,xmm7 ; xmm6=tmp12 + paddw xmm4,xmm1 ; xmm4=tmp10 + paddw xmm0,xmm7 ; xmm0=tmp11 + + paddw xmm6,xmm3 + psllw xmm6,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1 + + movdqa xmm1,xmm4 + movdqa xmm7,xmm3 + psubw xmm4,xmm0 ; xmm4=data4 + psubw xmm3,xmm6 ; xmm3=data6 + paddw xmm1,xmm0 ; xmm1=data0 + paddw xmm7,xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm5,xmm0 ; xmm5=tmp11 + paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3 + + movdqa xmm4,xmm2 ; xmm4=tmp10 + psubw xmm2,xmm0 + pmulhw xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm2 ; xmm4=z2 + paddw xmm0,xmm2 ; xmm0=z4 + + movdqa xmm3,xmm6 + psubw xmm6,xmm5 ; xmm6=z13 + paddw xmm3,xmm5 ; xmm3=z11 + + movdqa xmm2,xmm6 + movdqa xmm5,xmm3 + psubw xmm6,xmm4 ; xmm6=data3 + psubw xmm3,xmm0 ; xmm3=data7 + paddw xmm2,xmm4 ; xmm2=data5 + paddw xmm5,xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2,xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5,xmm6 + movdqa xmm3,xmm1 + psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7,xmm6 + movdqa xmm0,xmm2 + paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm1,xmm5 + psubw xmm3,xmm6 ; xmm3=tmp13 + psubw xmm5,xmm2 ; xmm5=tmp12 + paddw xmm4,xmm6 ; xmm4=tmp10 + paddw xmm1,xmm2 ; xmm1=tmp11 + + paddw xmm5,xmm3 + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1 + + movdqa xmm6,xmm4 + movdqa xmm2,xmm3 + psubw xmm4,xmm1 ; xmm4=data4 + psubw xmm3,xmm5 ; xmm3=data6 + paddw xmm6,xmm1 ; xmm6=data0 + paddw xmm2,xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7,xmm0 ; xmm7=tmp10 + paddw xmm0,xmm1 ; xmm0=tmp11 + paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7,PRE_MULTIPLY_SCALE_BITS + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3 + + movdqa xmm4,xmm7 ; xmm4=tmp10 + psubw xmm7,xmm1 + pmulhw xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5 + pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4,xmm7 ; xmm4=z2 + paddw xmm1,xmm7 ; xmm1=z4 + + movdqa xmm3,xmm5 + psubw xmm5,xmm0 ; xmm5=z13 + paddw xmm3,xmm0 ; xmm3=z11 + + movdqa xmm6,xmm5 + movdqa xmm2,xmm3 + psubw xmm5,xmm4 ; xmm5=data3 + psubw xmm3,xmm1 ; xmm3=data7 + paddw xmm6,xmm4 ; xmm6=data5 + paddw xmm2,xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_INT_SSE2_SUPPORTED +%endif ; DCT_IFAST_SUPPORTED diff --git a/jfss2int.asm b/jfss2int.asm new file mode 100644 index 0000000..106b42c --- /dev/null +++ b/jfss2int.asm @@ -0,0 +1,641 @@ +; +; jfss2int.asm - accurate integer FDCT (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED +%ifdef JFDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_islow_sse2) + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_islow_sse2 (DCTELEM * data) +; + +%define data(b) (b)+8 ; DCTELEM * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 16 + global EXTN(jpeg_fdct_islow_sse2) + +EXTN(jpeg_fdct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6,xmm1 + movdqa xmm3,xmm0 + psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2,xmm1 + movdqa xmm5,xmm7 + paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4,xmm3 + movdqa xmm0,xmm6 + paddw xmm3,xmm1 ; xmm3=tmp10 + paddw xmm6,xmm7 ; xmm6=tmp11 + psubw xmm4,xmm1 ; xmm4=tmp13 + psubw xmm0,xmm7 ; xmm0=tmp12 + + movdqa xmm1,xmm3 + paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3,PASS1_BITS ; xmm3=data0 + psllw xmm1,PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7,xmm4 ; xmm4=tmp13 + movdqa xmm6,xmm4 + punpcklwd xmm7,xmm0 ; xmm0=tmp12 + punpckhwd xmm6,xmm0 + movdqa xmm4,xmm7 + movdqa xmm0,xmm6 + pmaddwd xmm7,[GOTOFF(ebx,PW_F130_F054)] ; xmm7=data2L + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=data2H + pmaddwd xmm4,[GOTOFF(ebx,PW_F054_MF130)] ; xmm4=data6L + pmaddwd xmm0,[GOTOFF(ebx,PW_F054_MF130)] ; xmm0=data6H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm6,DESCALE_P1 + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm7,xmm6 ; xmm7=data2 + packssdw xmm4,xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6,xmm2 ; xmm2=tmp4 + movdqa xmm0,xmm5 ; xmm5=tmp5 + paddw xmm6,xmm3 ; xmm6=z3 + paddw xmm0,xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7,xmm6 + movdqa xmm4,xmm6 + punpcklwd xmm7,xmm0 + punpckhwd xmm4,xmm0 + movdqa xmm6,xmm7 + movdqa xmm0,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3H + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4L + pmaddwd xmm0,[GOTOFF(ebx,PW_F117_F078)] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7,xmm2 + movdqa xmm4,xmm2 + punpcklwd xmm7,xmm1 + punpckhwd xmm4,xmm1 + movdqa xmm2,xmm7 + movdqa xmm1,xmm4 + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp4L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF089_F060)] ; xmm2=tmp7L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2,xmm6 ; xmm2=data1L + paddd xmm1,xmm0 ; xmm1=data1H + + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm2,DESCALE_P1 + psrad xmm1,DESCALE_P1 + + packssdw xmm7,xmm4 ; xmm7=data7 + packssdw xmm2,xmm1 ; xmm2=data1 + + movdqa xmm4,xmm5 + movdqa xmm1,xmm5 + punpcklwd xmm4,xmm3 + punpckhwd xmm1,xmm3 + movdqa xmm5,xmm4 + movdqa xmm3,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm4=tmp5L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5H + pmaddwd xmm5,[GOTOFF(ebx,PW_MF256_F050)] ; xmm5=tmp6L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6H + + paddd xmm4,xmm6 ; xmm4=data5L + paddd xmm1,xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4,DESCALE_P1 + psrad xmm1,DESCALE_P1 + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm5,DESCALE_P1 + psrad xmm3,DESCALE_P1 + + packssdw xmm4,xmm1 ; xmm4=data5 + packssdw xmm5,xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2,xmm5 + movdqa xmm7,xmm6 + psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5,xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0,xmm5 + movdqa xmm3,xmm4 + paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1,xmm7 + movdqa xmm6,xmm2 + paddw xmm7,xmm5 ; xmm7=tmp10 + paddw xmm2,xmm4 ; xmm2=tmp11 + psubw xmm1,xmm5 ; xmm1=tmp13 + psubw xmm6,xmm4 ; xmm6=tmp12 + + movdqa xmm5,xmm7 + paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)] + paddw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)] + psraw xmm7,PASS1_BITS ; xmm7=data0 + psraw xmm5,PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4,xmm1 ; xmm1=tmp13 + movdqa xmm2,xmm1 + punpcklwd xmm4,xmm6 ; xmm6=tmp12 + punpckhwd xmm2,xmm6 + movdqa xmm1,xmm4 + movdqa xmm6,xmm2 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=data2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F130_F054)] ; xmm2=data2H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=data6L + pmaddwd xmm6,[GOTOFF(ebx,PW_F054_MF130)] ; xmm6=data6H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm6,DESCALE_P2 + + packssdw xmm4,xmm2 ; xmm4=data2 + packssdw xmm1,xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2,xmm0 ; xmm0=tmp4 + movdqa xmm6,xmm3 ; xmm3=tmp5 + paddw xmm2,xmm7 ; xmm2=z3 + paddw xmm6,xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4,xmm2 + movdqa xmm1,xmm2 + punpcklwd xmm4,xmm6 + punpckhwd xmm1,xmm6 + movdqa xmm2,xmm4 + movdqa xmm6,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF078_F117)] ; xmm1=z3H + pmaddwd xmm2,[GOTOFF(ebx,PW_F117_F078)] ; xmm2=z4L + pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4,xmm0 + movdqa xmm1,xmm0 + punpcklwd xmm4,xmm5 + punpckhwd xmm1,xmm5 + movdqa xmm0,xmm4 + movdqa xmm5,xmm1 + pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4L + pmaddwd xmm1,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm1=tmp4H + pmaddwd xmm0,[GOTOFF(ebx,PW_MF089_F060)] ; xmm0=tmp7L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF089_F060)] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0,xmm2 ; xmm0=data1L + paddd xmm5,xmm6 ; xmm5=data1H + + paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm0,DESCALE_P2 + psrad xmm5,DESCALE_P2 + + packssdw xmm4,xmm1 ; xmm4=data7 + packssdw xmm0,xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1,xmm3 + movdqa xmm5,xmm3 + punpcklwd xmm1,xmm7 + punpckhwd xmm5,xmm7 + movdqa xmm3,xmm1 + movdqa xmm7,xmm5 + pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5L + pmaddwd xmm5,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm5=tmp5H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF256_F050)] ; xmm7=tmp6H + + paddd xmm1,xmm2 ; xmm1=data5L + paddd xmm5,xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1,DESCALE_P2 + psrad xmm5,DESCALE_P2 + paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm3,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm1,xmm5 ; xmm1=data5 + packssdw xmm3,xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_INT_SSE2_SUPPORTED +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jfsseflt.asm b/jfsseflt.asm new file mode 100644 index 0000000..98b0973 --- /dev/null +++ b/jfsseflt.asm @@ -0,0 +1,383 @@ +; +; jfsseflt.asm - floating-point FDCT (SSE) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JFDCT_FLT_SSE_MMX_SUPPORTED +%define JFDCT_FLT_SSE_SUPPORTED +%endif +%ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED +%define JFDCT_FLT_SSE_SUPPORTED +%endif +%ifdef JFDCT_FLT_SSE_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_fdct_float_sse) + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jpeg_fdct_float_sse (FAST_FLOAT * data) +; + +%define data(b) (b)+8 ; FAST_FLOAT * data + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_fdct_float_sse) + +EXTN(jpeg_fdct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16,7 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) + movaps xmm5,xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) + movaps xmm2,xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3,xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0,xmm7 + movaps xmm5,xmm6 + subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2,xmm7 + movaps xmm3,xmm4 + addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1,xmm5 + movaps xmm6,xmm0 + subps xmm5,xmm7 ; xmm5=tmp13 + subps xmm0,xmm4 ; xmm0=tmp12 + addps xmm1,xmm7 ; xmm1=tmp10 + addps xmm6,xmm4 ; xmm6=tmp11 + + addps xmm0,xmm5 + mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7,xmm1 + movaps xmm4,xmm5 + subps xmm1,xmm6 ; xmm1=data4 + subps xmm5,xmm0 ; xmm5=data6 + addps xmm7,xmm6 ; xmm7=data0 + addps xmm4,xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2,xmm3 ; xmm2=tmp10 + addps xmm3,xmm6 ; xmm3=tmp11 + addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1,xmm2 ; xmm1=tmp10 + subps xmm2,xmm6 + mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1,xmm2 ; xmm1=z2 + addps xmm6,xmm2 ; xmm6=z4 + + movaps xmm5,xmm0 + subps xmm0,xmm3 ; xmm0=z13 + addps xmm5,xmm3 ; xmm5=z11 + + movaps xmm7,xmm0 + movaps xmm4,xmm5 + subps xmm0,xmm1 ; xmm0=data3 + subps xmm5,xmm6 ; xmm5=data7 + addps xmm7,xmm1 ; xmm7=data5 + addps xmm4,xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, byte 4*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JFDCT_FLT_SSE_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/ji3dnflt.asm b/ji3dnflt.asm new file mode 100644 index 0000000..9c31e99 --- /dev/null +++ b/ji3dnflt.asm @@ -0,0 +1,462 @@ +; +; ji3dnflt.asm - floating-point IDCT (3DNow! & MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_3dnow) + +EXTN(jconst_idct_float_3dnow): + +PD_1_414 times 2 dd 1.414213562373095048801689 +PD_1_847 times 2 dd 1.847759065022573512256366 +PD_1_082 times 2 dd 1.082392200292393968799446 +PD_2_613 times 2 dd 2.613125929752753055713286 +PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_float_3dnow (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_float_3dnow) + +EXTN(jpeg_idct_float_3dnow): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT * wsptr + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + pushpic ebx ; save GOT address + mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + or eax,ebx + poppic ebx ; restore GOT address + jnz short .columnDCT + + ; -- AC terms all zero + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + psrad mm0,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm1,mm0 + punpckldq mm0,mm0 + punpckhdq mm1,mm1 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0,mm0 + punpcklwd mm1,mm1 + psrad mm0,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + pi2fd mm1,mm1 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 + psrad mm2,(DWORD_BIT-WORD_BIT) + psrad mm3,(DWORD_BIT-WORD_BIT) + pi2fd mm2,mm2 + pi2fd mm3,mm3 + + pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm5,mm5 + punpcklwd mm1,mm1 + psrad mm5,(DWORD_BIT-WORD_BIT) + psrad mm1,(DWORD_BIT-WORD_BIT) + pi2fd mm5,mm5 + pi2fd mm1,mm1 + + pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 01) + pfadd mm7,mm3 ; mm7=data1=(10 11) + pfsub mm5,mm1 ; mm5=data7=(70 71) + pfsub mm0,mm3 ; mm0=data6=(60 61) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,mm6 ; transpose coefficients + punpckldq mm6,mm7 ; mm6=(00 10) + punpckhdq mm1,mm7 ; mm1=(01 11) + movq mm3,mm0 ; transpose coefficients + punpckldq mm0,mm5 ; mm0=(60 70) + punpckhdq mm3,mm5 ; mm3=(61 71) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3 + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm5, MMWORD [wk(1)] ; mm5=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm6,mm7 + movq mm1,mm5 + pfadd mm7,mm2 ; mm7=data2=(20 21) + pfadd mm5,mm4 ; mm5=data4=(40 41) + pfsub mm6,mm2 ; mm6=data5=(50 51) + pfsub mm1,mm4 ; mm1=data3=(30 31) + + movq mm0,mm7 ; transpose coefficients + punpckldq mm7,mm1 ; mm7=(20 30) + punpckhdq mm0,mm1 ; mm0=(21 31) + movq mm3,mm5 ; transpose coefficients + punpckldq mm5,mm6 ; mm5=(40 50) + punpckhdq mm3,mm6 ; mm3=(41 51) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3 + +.nextcolumn: + add esi, byte 2*SIZEOF_JCOEF ; coef_block + add edx, byte 2*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm0 + movq mm5,mm1 + pfsub mm0,mm2 ; mm0=tmp11 + pfsub mm1,mm3 + pfadd mm4,mm2 ; mm4=tmp10 + pfadd mm5,mm3 ; mm5=tmp13 + + pfmul mm1,[GOTOFF(ebx,PD_1_414)] + pfsub mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + pfsub mm4,mm5 ; mm4=tmp3 + pfsub mm0,mm1 ; mm0=tmp2 + pfadd mm6,mm5 ; mm6=tmp0 + pfadd mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4,mm2 + movq mm0,mm5 + pfadd mm2,mm1 ; mm2=z11 + pfadd mm5,mm3 ; mm5=z13 + pfsub mm4,mm1 ; mm4=z12 + pfsub mm0,mm3 ; mm0=z10 + + movq mm1,mm2 + pfsub mm2,mm5 + pfadd mm1,mm5 ; mm1=tmp7 + + pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3,mm0 + pfadd mm0,mm4 + pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3,mm0 ; mm3=tmp12 + pfsub mm4,mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3,mm1 ; mm3=tmp6 + movq mm5,mm6 + movq mm0,mm7 + pfadd mm6,mm1 ; mm6=data0=(00 10) + pfadd mm7,mm3 ; mm7=data1=(01 11) + pfsub mm5,mm1 ; mm5=data7=(07 17) + pfsub mm0,mm3 ; mm0=data6=(06 16) + pfsub mm2,mm3 ; mm2=tmp5 + + movq mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm1=[PD_RNDINT_MAGIC] + pcmpeqd mm3,mm3 + psrld mm3,WORD_BIT ; mm3={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm6,mm1 ; mm6=roundint(data0/8)=(00 ** 10 **) + pfadd mm7,mm1 ; mm7=roundint(data1/8)=(01 ** 11 **) + pfadd mm0,mm1 ; mm0=roundint(data6/8)=(06 ** 16 **) + pfadd mm5,mm1 ; mm5=roundint(data7/8)=(07 ** 17 **) + + pand mm6,mm3 ; mm6=(00 -- 10 --) + pslld mm7,WORD_BIT ; mm7=(-- 01 -- 11) + pand mm0,mm3 ; mm0=(06 -- 16 --) + pslld mm5,WORD_BIT ; mm5=(-- 07 -- 17) + por mm6,mm7 ; mm6=(00 01 10 11) + por mm0,mm5 ; mm0=(06 07 16 17) + + movq mm1, MMWORD [wk(0)] ; mm1=tmp2 + movq mm3, MMWORD [wk(1)] ; mm3=tmp3 + + pfadd mm4,mm2 ; mm4=tmp4 + movq mm7,mm1 + movq mm5,mm3 + pfadd mm1,mm2 ; mm1=data2=(02 12) + pfadd mm3,mm4 ; mm3=data4=(04 14) + pfsub mm7,mm2 ; mm7=data5=(05 15) + pfsub mm5,mm4 ; mm5=data3=(03 13) + + movq mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm2=[PD_RNDINT_MAGIC] + pcmpeqd mm4,mm4 + psrld mm4,WORD_BIT ; mm4={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm3,mm2 ; mm3=roundint(data4/8)=(04 ** 14 **) + pfadd mm7,mm2 ; mm7=roundint(data5/8)=(05 ** 15 **) + pfadd mm1,mm2 ; mm1=roundint(data2/8)=(02 ** 12 **) + pfadd mm5,mm2 ; mm5=roundint(data3/8)=(03 ** 13 **) + + pand mm3,mm4 ; mm3=(04 -- 14 --) + pslld mm7,WORD_BIT ; mm7=(-- 05 -- 15) + pand mm1,mm4 ; mm1=(02 -- 12 --) + pslld mm5,WORD_BIT ; mm5=(-- 03 -- 13) + por mm3,mm7 ; mm3=(04 05 14 15) + por mm1,mm5 ; mm1=(02 03 12 13) + + movq mm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm2=[PB_CENTERJSAMP] + + packsswb mm6,mm3 ; mm6=(00 01 10 11 04 05 14 15) + packsswb mm1,mm0 ; mm1=(02 03 12 13 06 07 16 17) + paddb mm6,mm2 + paddb mm1,mm2 + + movq mm4,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm1 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm4,mm1 ; mm4=(04 05 06 07 14 15 16 17) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm4 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm4 ; mm7=(10 11 12 13 14 15 16 17) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + + poppic ebx ; restore GOT address + + add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 2*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_FLT_3DNOW_MMX_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jidctflt.asm b/jidctflt.asm new file mode 100644 index 0000000..126dc7b --- /dev/null +++ b/jidctflt.asm @@ -0,0 +1,473 @@ +; +; jidctflt.asm - floating-point IDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +%define ROTATOR_TYPE FP32 ; float + + alignz 16 + global EXTN(jconst_idct_float) + +EXTN(jconst_idct_float): + +F_1_414 dd 1.414213562373095048801689 ; 2*cos(PI*1/4) +F_1_847 dd 1.847759065022573512256366 ; 2*cos(PI*1/8) +F_1_082 dd 1.082392200292393968799446 ; 2*(cos(PI*1/8)-cos(PI*3/8)) +F_2_613 dd 2.613125929752753055713286 ; 2*(cos(PI*1/8)+cos(PI*3/8)) + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define tmp ebp-SIZEOF_FP64 ; double tmp +%define workspace tmp-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] +%define rndint_magic workspace-SIZEOF_FP32 + ; float rndint_magic = 100663296.0F +%define gotptr rndint_magic-SIZEOF_POINTER ; void * gotptr + + align 16 + global EXTN(jpeg_idct_float) + +EXTN(jpeg_idct_float): + push ebp + mov ebp,esp + lea esp, [workspace] + push FP32 0x4CC00000 ; (float)(0x00C00000 << 3) + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + ; ---- Pass 1: process columns from input, store into work array. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + lea edi, [workspace] ; FAST_FLOAT * wsptr + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.columnloop: + mov ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + mov bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + mov ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + or ax,bx + jnz short .columnDCT + + ; -- AC terms all zero + + fild JCOEF [COL(0,esi,SIZEOF_JCOEF)] + fmul FLOAT_MULT_TYPE [COL(0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + fst FAST_FLOAT [COL(0,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(1,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(2,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(3,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(4,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(5,edi,SIZEOF_FAST_FLOAT)] + fst FAST_FLOAT [COL(6,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(7,edi,SIZEOF_FAST_FLOAT)] + jmp near .nextcolumn + alignx 16,7 + +.columnDCT: + movpic ebx, POINTER [gotptr] ; load GOT address + + ; -- Even part + + fild JCOEF [COL(2,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(6,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(4,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(0,esi,SIZEOF_JCOEF)] + + fxch st0,st3 + + fmul FLOAT_MULT_TYPE [COL(2,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st2 + fmul FLOAT_MULT_TYPE [COL(6,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st1 + fmul FLOAT_MULT_TYPE [COL(4,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st3 + fmul FLOAT_MULT_TYPE [COL(0,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st1 + + fld st2 ; st2 = st2 + st0, st0 = st2 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st3,st0 + + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_414)] + + fld st3 ; st1 = st1 + st3, st3 = st1 - st3 + fsubr st0,st2 + fxch st0,st4 + faddp st2,st0 + + fsub st0,st2 + + fld st1 ; st2 = st1 + st2, st1 = st1 - st2 + fsub st0,st3 + fxch st0,st2 + faddp st3,st0 + fld st3 ; st0 = st3 + st0, st3 = st3 - st0 + fsub st0,st1 + fxch st0,st4 + faddp st1,st0 + + ; -- Odd part + + fild JCOEF [COL(1,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(7,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(3,esi,SIZEOF_JCOEF)] + fild JCOEF [COL(5,esi,SIZEOF_JCOEF)] + + fxch st0,st3 + + fmul FLOAT_MULT_TYPE [COL(1,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st2 + fmul FLOAT_MULT_TYPE [COL(7,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st1 + fmul FLOAT_MULT_TYPE [COL(3,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st6 + fxch st3,st0 + fmul FLOAT_MULT_TYPE [COL(5,edx,SIZEOF_FLOAT_MULT_TYPE)] + fxch st0,st5 + fstp FP64 [tmp] + + fld st1 ; st1 = st1 + st0, st0 = st1 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st2,st0 + fld st5 ; st4 = st4 + st5, st5 = st4 - st5 + fsubr st0,st5 + fxch st0,st6 + faddp st5,st0 + + fld st1 ; st1 = st1 + st4, st4 = st1 - st4 + fsub st0,st5 + fxch st0,st5 + faddp st2,st0 + + fld st5 + fadd st0,st1 + fxch st0,st5 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_414)] + fxch st0,st5 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_847)] + fxch st0,st6 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_2_613)] + fxch st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_082)] + fxch st0,st6 + fsubr st1,st0 + fsubp st6,st0 + + ; -- Final output stage + + fsub st0,st1 + fld st2 ; st1 = st2 + st1, st2 = st2 - st1 + fsub st0,st2 + fxch st0,st3 + faddp st2,st0 + fsub st4,st0 + fld st3 ; st0 = st3 + st0, st3 = st3 - st0 + fsub st0,st1 + fxch st0,st4 + faddp st1,st0 + + fxch st0,st2 + + fstp FAST_FLOAT [COL(7,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(0,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(1,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(6,edi,SIZEOF_FAST_FLOAT)] + + fadd st1,st0 + fld FP64 [tmp] + fld st1 ; st3 = st3 + st1, st1 = st3 - st1 + fsubr st0,st4 + fxch st0,st2 + faddp st4,st0 + fld st0 ; st0 = st0 + st2, st2 = st0 - st2 + fsub st0,st3 + fxch st0,st3 + faddp st1,st0 + + fxch st0,st3 + + fstp FAST_FLOAT [COL(2,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(5,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(3,edi,SIZEOF_FAST_FLOAT)] + fstp FAST_FLOAT [COL(4,edi,SIZEOF_FAST_FLOAT)] + +.nextcolumn: + add esi, byte SIZEOF_JCOEF ; advance pointers to next column + add edx, byte SIZEOF_FLOAT_MULT_TYPE + add edi, byte SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov edx, POINTER [cinfo(ebp)] + mov edx, POINTER [jdstruct_sample_range_limit(edx)] + sub edx, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit + + lea esi, [workspace] ; FAST_FLOAT * wsptr + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.rowloop: + push edi + mov edi, JSAMPROW [edi] ; (JSAMPLE *) + add edi, JDIMENSION [output_col(ebp)] ; edi=outptr + +%ifndef NO_ZERO_ROW_TEST_FLOAT + mov eax, FAST_FLOAT [ROW(1,esi,SIZEOF_FAST_FLOAT)] + add eax,eax ; shl eax,1 (shift out the sign bit) + jnz short .rowDCT + + mov eax, FAST_FLOAT [ROW(2,esi,SIZEOF_FAST_FLOAT)] + mov ebx, FAST_FLOAT [ROW(3,esi,SIZEOF_FAST_FLOAT)] + or eax, FAST_FLOAT [ROW(4,esi,SIZEOF_FAST_FLOAT)] + or ebx, FAST_FLOAT [ROW(5,esi,SIZEOF_FAST_FLOAT)] + or eax, FAST_FLOAT [ROW(6,esi,SIZEOF_FAST_FLOAT)] + or ebx, FAST_FLOAT [ROW(7,esi,SIZEOF_FAST_FLOAT)] + or eax,ebx + add eax,eax ; shl eax,1 (shift out the sign bit) + jnz short .rowDCT + + ; -- AC terms all zero + + push eax + + fld FAST_FLOAT [ROW(0,esi,SIZEOF_FAST_FLOAT)] + fadd FP32 [rndint_magic] + fstp FP32 [esp] + + pop eax + and eax,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+4*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+5*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+6*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+7*SIZEOF_JSAMPLE], al + jmp near .nextrow + alignx 16,7 +%endif +.rowDCT: + movpic ebx, POINTER [gotptr] ; load GOT address + + ; -- Even part + + fld FAST_FLOAT [ROW(4,esi,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(2,esi,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(0,esi,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(6,esi,SIZEOF_FAST_FLOAT)] + + fld st2 ; st2 = st2 + st0, st0 = st2 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st3,st0 + + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_414)] + + fld st3 ; st1 = st1 + st3, st3 = st1 - st3 + fsubr st0,st2 + fxch st0,st4 + faddp st2,st0 + + fsub st0,st2 + + fld st1 ; st2 = st1 + st2, st1 = st1 - st2 + fsub st0,st3 + fxch st0,st2 + faddp st3,st0 + fld st3 ; st0 = st3 + st0, st3 = st3 - st0 + fsub st0,st1 + fxch st0,st4 + faddp st1,st0 + + ; -- Odd part + + fld FAST_FLOAT [ROW(3,esi,SIZEOF_FAST_FLOAT)] + fxch st0,st3 + fld FAST_FLOAT [ROW(1,esi,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(7,esi,SIZEOF_FAST_FLOAT)] + fld FAST_FLOAT [ROW(5,esi,SIZEOF_FAST_FLOAT)] + fxch st0,st5 + fstp FP64 [tmp] + + fld st1 ; st1 = st1 + st0, st0 = st1 - st0 + fsub st0,st1 + fxch st0,st1 + faddp st2,st0 + fld st5 ; st4 = st4 + st5, st5 = st4 - st5 + fsubr st0,st5 + fxch st0,st6 + faddp st5,st0 + + fld st1 ; st1 = st1 + st4, st4 = st1 - st4 + fsub st0,st5 + fxch st0,st5 + faddp st2,st0 + + fld st5 + fadd st0,st1 + fxch st0,st5 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_414)] + fxch st0,st5 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_847)] + fxch st0,st6 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_2_613)] + fxch st0,st1 + fmul ROTATOR_TYPE [GOTOFF(ebx,F_1_082)] + fxch st0,st6 + fsubr st1,st0 + fsubp st6,st0 + + ; -- Final output stage + + sub esp, byte DCTSIZE*SIZEOF_FP32 + + fsub st0,st1 + fld st2 ; st1 = st2 + st1, st2 = st2 - st1 + fsub st0,st2 + fxch st0,st3 + faddp st2,st0 + fsub st4,st0 + fld st3 ; st0 = st3 + st0, st3 = st3 - st0 + fsub st0,st1 + fxch st0,st4 + faddp st1,st0 + + fld FP32 [rndint_magic] + + fadd st4,st0 + fadd st1,st0 + fadd st2,st0 + fadd st3,st0 + + fxch st0,st4 + + fstp FP32 [esp+6*SIZEOF_FP32] + fstp FP32 [esp+1*SIZEOF_FP32] + fstp FP32 [esp+0*SIZEOF_FP32] + fstp FP32 [esp+7*SIZEOF_FP32] + + fxch st0,st1 + + fadd st2,st0 + fld FP64 [tmp] + fld st1 ; st4 = st4 + st1, st1 = st4 - st1 + fsubr st0,st5 + fxch st0,st2 + faddp st5,st0 + fld st0 ; st0 = st0 + st3, st3 = st0 - st3 + fsub st0,st4 + fxch st0,st4 + faddp st1,st0 + + fxch st0,st2 + + fadd st1,st0 + fadd st2,st0 + fadd st3,st0 + faddp st4,st0 + + fstp FP32 [esp+5*SIZEOF_FP32] + fstp FP32 [esp+4*SIZEOF_FP32] + fstp FP32 [esp+3*SIZEOF_FP32] + fstp FP32 [esp+2*SIZEOF_FP32] + +%assign i 0 ; i=0; +%rep 4 ; -- repeat 4 times --- + pop eax + pop ebx + and eax,RANGE_MASK + and ebx,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov bl, JSAMPLE [edx+ebx*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+(i+0)*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+(i+1)*SIZEOF_JSAMPLE], bl +%assign i i+2 ; i+=2; +%endrep ; -- repeat end --- + +.nextrow: + pop edi + add esi, byte DCTSIZE*SIZEOF_FAST_FLOAT + add edi, byte SIZEOF_JSAMPROW ; advance pointer to next row + dec ecx + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp + pop ebp + ret + +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jidctfst.asm b/jidctfst.asm new file mode 100644 index 0000000..8022ac6 --- /dev/null +++ b/jidctfst.asm @@ -0,0 +1,464 @@ +; +; jidctfst.asm - fast integer IDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +; We can gain a little more speed, with a further compromise in accuracy, +; by omitting the addition in a descaling shift. This yields an +; incorrectly rounded result half the time... +; +%macro descale 2 +%ifdef USE_ACCURATE_ROUNDING +%if (%2)<=7 + add %1, byte (1<<((%2)-1)) ; add reg32,imm8 +%else + add %1, (1<<((%2)-1)) ; add reg32,imm32 +%endif +%endif + sar %1,%2 +%endmacro + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define range_limit ebp-SIZEOF_POINTER ; JSAMPLE * range_limit +%define ptr range_limit-SIZEOF_POINTER ; void * ptr +%define workspace ptr-DCTSIZE2*SIZEOF_INT + ; int workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_ifast) + +EXTN(jpeg_idct_ifast): + push ebp + mov ebp,esp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process columns from input, store into work array. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + lea edi, [workspace] ; int * wsptr + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.columnloop: + mov ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + mov bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + mov ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + or ax,bx + jnz short .columnDCT + + ; -- AC terms all zero + + mov ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul ax, IFAST_MULT_TYPE [COL(0,edx,SIZEOF_IFAST_MULT_TYPE)] + cwde + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(1,edi,SIZEOF_INT)], eax + mov INT [COL(2,edi,SIZEOF_INT)], eax + mov INT [COL(3,edi,SIZEOF_INT)], eax + mov INT [COL(4,edi,SIZEOF_INT)], eax + mov INT [COL(5,edi,SIZEOF_INT)], eax + mov INT [COL(6,edi,SIZEOF_INT)], eax + mov INT [COL(7,edi,SIZEOF_INT)], eax + jmp near .nextcolumn + alignx 16,7 + +.columnDCT: + push ecx ; ctr + push esi ; coef_block + push edx ; quantptr + + mov POINTER [ptr], edi ; wsptr + + ; -- Even part + + movsx eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + movsx ecx, JCOEF [COL(4,esi,SIZEOF_JCOEF)] + imul ax, IFAST_MULT_TYPE [COL(0,edx,SIZEOF_IFAST_MULT_TYPE)] + imul cx, IFAST_MULT_TYPE [COL(4,edx,SIZEOF_IFAST_MULT_TYPE)] + movsx ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + movsx edi, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + imul bx, IFAST_MULT_TYPE [COL(2,edx,SIZEOF_IFAST_MULT_TYPE)] + imul di, IFAST_MULT_TYPE [COL(6,edx,SIZEOF_IFAST_MULT_TYPE)] + + lea edx,[eax+ecx] ; edx=tmp10 + sub eax,ecx ; eax=tmp11 + + lea ecx,[ebx+edi] ; ecx=tmp13 + sub ebx,edi + imul ebx,(F_1_414) + descale ebx,CONST_BITS + sub ebx,ecx ; ebx=tmp12 + + lea edi,[edx+ecx] ; edi=tmp0 + sub edx,ecx ; edx=tmp3 + lea ecx,[eax+ebx] ; ecx=tmp1 + sub eax,ebx ; eax=tmp2 + + push edx ; tmp3 + push eax ; tmp2 + push ecx ; tmp1 + push edi ; tmp0 + + ; -- Odd part + + mov edx, POINTER [esp+16] ; quantptr + + movsx eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + movsx ebx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + imul ax, IFAST_MULT_TYPE [COL(1,edx,SIZEOF_IFAST_MULT_TYPE)] + imul bx, IFAST_MULT_TYPE [COL(7,edx,SIZEOF_IFAST_MULT_TYPE)] + movsx edi, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + movsx ecx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + imul di, IFAST_MULT_TYPE [COL(5,edx,SIZEOF_IFAST_MULT_TYPE)] + imul cx, IFAST_MULT_TYPE [COL(3,edx,SIZEOF_IFAST_MULT_TYPE)] + + lea esi,[eax+ebx] ; esi=z11 + sub eax,ebx ; eax=z12 + lea edx,[edi+ecx] ; edx=z13 + sub edi,ecx ; edi=z10 + + lea ebx,[esi+edx] ; ebx=tmp7 + sub esi,edx + imul esi,(F_1_414) ; esi=tmp11 + descale esi,CONST_BITS + + lea ecx,[edi+eax] + imul ecx,(F_1_847) ; ecx=z5 + imul edi,(-F_2_613) ; edi=MULTIPLY(z10,-FIX_2_613125930) + imul eax,(F_1_082) ; eax=MULTIPLY(z12,FIX_1_082392200) + descale ecx,CONST_BITS + descale edi,CONST_BITS + descale eax,CONST_BITS + add edi,ecx ; edi=tmp12 + sub eax,ecx ; eax=tmp10 + + ; -- Final output stage + + sub edi,ebx ; edi=tmp6 + pop edx ; edx=tmp0 + sub esi,edi ; esi=tmp5 + pop ecx ; ecx=tmp1 + add eax,esi ; eax=tmp4 + push esi ; tmp5 + push eax ; tmp4 + + lea eax,[edx+ebx] ; eax=data0(=tmp0+tmp7) + sub edx,ebx ; edx=data7(=tmp0-tmp7) + lea ebx,[ecx+edi] ; ebx=data1(=tmp1+tmp6) + sub ecx,edi ; ecx=data6(=tmp1-tmp6) + + mov edi, POINTER [ptr] ; edi=wsptr + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(7,edi,SIZEOF_INT)], edx + mov INT [COL(1,edi,SIZEOF_INT)], ebx + mov INT [COL(6,edi,SIZEOF_INT)], ecx + + pop esi ; esi=tmp4 + pop eax ; eax=tmp5 + pop edx ; edx=tmp2 + pop ecx ; ecx=tmp3 + + lea ebx,[edx+eax] ; ebx=data2(=tmp2+tmp5) + sub edx,eax ; edx=data5(=tmp2-tmp5) + lea eax,[ecx+esi] ; eax=data4(=tmp3+tmp4) + sub ecx,esi ; ecx=data3(=tmp3-tmp4) + + mov INT [COL(2,edi,SIZEOF_INT)], ebx + mov INT [COL(5,edi,SIZEOF_INT)], edx + mov INT [COL(4,edi,SIZEOF_INT)], eax + mov INT [COL(3,edi,SIZEOF_INT)], ecx + + pop edx ; quantptr + pop esi ; coef_block + pop ecx ; ctr + +.nextcolumn: + add esi, byte SIZEOF_JCOEF ; advance pointers to next column + add edx, byte SIZEOF_IFAST_MULT_TYPE + add edi, byte SIZEOF_INT + dec ecx + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, POINTER [cinfo(ebp)] + mov eax, POINTER [jdstruct_sample_range_limit(eax)] + sub eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit + mov POINTER [range_limit], eax + + lea esi, [workspace] ; int * wsptr + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.rowloop: + push edi + mov edi, JSAMPROW [edi] ; (JSAMPLE *) + add edi, JDIMENSION [output_col(ebp)] ; edi=outptr + +%ifndef NO_ZERO_ROW_TEST + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + or eax, INT [ROW(2,esi,SIZEOF_INT)] + jnz short .rowDCT + + mov ebx, INT [ROW(3,esi,SIZEOF_INT)] + mov eax, INT [ROW(4,esi,SIZEOF_INT)] + or ebx, INT [ROW(5,esi,SIZEOF_INT)] + or eax, INT [ROW(6,esi,SIZEOF_INT)] + or ebx, INT [ROW(7,esi,SIZEOF_INT)] + or eax,ebx + jnz short .rowDCT + + ; -- AC terms all zero + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + + mov edx, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(PASS1_BITS+3) + and eax,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+4*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+5*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+6*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+7*SIZEOF_JSAMPLE], al + jmp near .nextrow + alignx 16,7 +%endif +.rowDCT: + push esi ; wsptr + push ecx ; ctr + + mov POINTER [ptr], edi ; outptr + + ; -- Even part + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + mov ebx, INT [ROW(2,esi,SIZEOF_INT)] + mov ecx, INT [ROW(4,esi,SIZEOF_INT)] + mov edi, INT [ROW(6,esi,SIZEOF_INT)] + + lea edx,[eax+ecx] ; edx=tmp10 + sub eax,ecx ; eax=tmp11 + + lea ecx,[ebx+edi] ; ecx=tmp13 + sub ebx,edi + imul ebx,(F_1_414) + descale ebx,CONST_BITS + sub ebx,ecx ; ebx=tmp12 + + lea edi,[edx+ecx] ; edi=tmp0 + sub edx,ecx ; edx=tmp3 + lea ecx,[eax+ebx] ; ecx=tmp1 + sub eax,ebx ; eax=tmp2 + + push edx ; tmp3 + push eax ; tmp2 + push ecx ; tmp1 + push edi ; tmp0 + + ; -- Odd part + + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + mov ecx, INT [ROW(3,esi,SIZEOF_INT)] + mov edi, INT [ROW(5,esi,SIZEOF_INT)] + mov ebx, INT [ROW(7,esi,SIZEOF_INT)] + + lea esi,[eax+ebx] ; esi=z11 + sub eax,ebx ; eax=z12 + lea edx,[edi+ecx] ; edx=z13 + sub edi,ecx ; edi=z10 + + lea ebx,[esi+edx] ; ebx=tmp7 + sub esi,edx + imul esi,(F_1_414) ; esi=tmp11 + descale esi,CONST_BITS + + lea ecx,[edi+eax] + imul ecx,(F_1_847) ; ecx=z5 + imul edi,(-F_2_613) ; edi=MULTIPLY(z10,-FIX_2_613125930) + imul eax,(F_1_082) ; eax=MULTIPLY(z12,FIX_1_082392200) + descale ecx,CONST_BITS + descale edi,CONST_BITS + descale eax,CONST_BITS + add edi,ecx ; edi=tmp12 + sub eax,ecx ; eax=tmp10 + + ; -- Final output stage + + sub edi,ebx ; edi=tmp6 + pop edx ; edx=tmp0 + sub esi,edi ; esi=tmp5 + pop ecx ; ecx=tmp1 + add eax,esi ; eax=tmp4 + push esi ; tmp5 + push eax ; tmp4 + + lea eax,[edx+ebx] ; eax=data0(=tmp0+tmp7) + sub edx,ebx ; edx=data7(=tmp0-tmp7) + lea ebx,[ecx+edi] ; ebx=data1(=tmp1+tmp6) + sub ecx,edi ; ecx=data6(=tmp1-tmp6) + + mov esi, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(PASS1_BITS+3) + descale edx,(PASS1_BITS+3) + descale ebx,(PASS1_BITS+3) + descale ecx,(PASS1_BITS+3) + + mov edi, POINTER [ptr] ; edi=outptr + + and eax,RANGE_MASK + and edx,RANGE_MASK + and ebx,RANGE_MASK + and ecx,RANGE_MASK + + mov al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE] + mov bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE] + mov cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE] + + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+7*SIZEOF_JSAMPLE], dl + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], bl + mov JSAMPLE [edi+6*SIZEOF_JSAMPLE], cl + + pop esi ; esi=tmp4 + pop eax ; eax=tmp5 + pop edx ; edx=tmp2 + pop ecx ; ecx=tmp3 + + lea ebx,[edx+eax] ; ebx=data2(=tmp2+tmp5) + sub edx,eax ; edx=data5(=tmp2-tmp5) + lea eax,[ecx+esi] ; eax=data4(=tmp3+tmp4) + sub ecx,esi ; ecx=data3(=tmp3-tmp4) + + mov esi, POINTER [range_limit] ; (JSAMPLE *) + + descale ebx,(PASS1_BITS+3) + descale edx,(PASS1_BITS+3) + descale eax,(PASS1_BITS+3) + descale ecx,(PASS1_BITS+3) + + and ebx,RANGE_MASK + and edx,RANGE_MASK + and eax,RANGE_MASK + and ecx,RANGE_MASK + + mov bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE] + mov al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE] + mov cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE] + + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], bl + mov JSAMPLE [edi+5*SIZEOF_JSAMPLE], dl + mov JSAMPLE [edi+4*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], cl + + pop ecx ; ctr + pop esi ; wsptr + +.nextrow: + pop edi + add esi, byte DCTSIZE*SIZEOF_INT ; advance pointer to next row + add edi, byte SIZEOF_JSAMPROW + dec ecx + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp + pop ebp + ret + +%endif ; DCT_IFAST_SUPPORTED diff --git a/jidctint.asm b/jidctint.asm new file mode 100644 index 0000000..eb81919 --- /dev/null +++ b/jidctint.asm @@ -0,0 +1,524 @@ +; +; jidctint.asm - accurate integer IDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +; Descale and correctly round a DWORD value that's scaled by N bits. +; +%macro descale 2 +%if (%2)<=7 + add %1, byte (1<<((%2)-1)) ; add reg32,imm8 +%else + add %1, (1<<((%2)-1)) ; add reg32,imm32 +%endif + sar %1,%2 +%endmacro + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define range_limit ebp-SIZEOF_POINTER ; JSAMPLE * range_limit +%define ptr range_limit-SIZEOF_POINTER ; void * ptr +%define workspace ptr-DCTSIZE2*SIZEOF_INT + ; int workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_islow) + +EXTN(jpeg_idct_islow): + push ebp + mov ebp,esp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process columns from input, store into work array. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + lea edi, [workspace] ; int * wsptr + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.columnloop: + mov ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + mov bx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + mov ax, JCOEF [COL(4,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + or ax,bx + jnz short .columnDCT + + ; -- AC terms all zero + + mov ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + cwde + + sal eax,PASS1_BITS + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(1,edi,SIZEOF_INT)], eax + mov INT [COL(2,edi,SIZEOF_INT)], eax + mov INT [COL(3,edi,SIZEOF_INT)], eax + mov INT [COL(4,edi,SIZEOF_INT)], eax + mov INT [COL(5,edi,SIZEOF_INT)], eax + mov INT [COL(6,edi,SIZEOF_INT)], eax + mov INT [COL(7,edi,SIZEOF_INT)], eax + jmp near .nextcolumn + alignx 16,7 + +.columnDCT: + push ecx ; ctr + push esi ; coef_block + push edx ; quantptr + + mov POINTER [ptr], edi ; wsptr + + ; -- Even part + + movsx eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + movsx ecx, JCOEF [COL(4,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul cx, ISLOW_MULT_TYPE [COL(4,edx,SIZEOF_ISLOW_MULT_TYPE)] + movsx ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + movsx edi, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + imul bx, ISLOW_MULT_TYPE [COL(2,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul di, ISLOW_MULT_TYPE [COL(6,edx,SIZEOF_ISLOW_MULT_TYPE)] + + lea edx,[eax+ecx] + sub eax,ecx + sal edx,CONST_BITS ; edx=tmp0 + sal eax,CONST_BITS ; eax=tmp1 + + lea ecx,[ebx+edi] + imul ecx,(F_0_541) ; ecx=z1 + imul ebx,(F_0_765) ; ebx=MULTIPLY(z2,FIX_0_765366865) + imul edi,(-F_1_847) ; edi=MULTIPLY(z3,-FIX_1_847759065) + add ebx,ecx ; ebx=tmp3 + add edi,ecx ; edi=tmp2 + + lea ecx,[edx+ebx] ; ecx=tmp10 + sub edx,ebx ; edx=tmp13 + lea ebx,[eax+edi] ; ebx=tmp11 + sub eax,edi ; eax=tmp12 + + push edx ; tmp13 + push eax ; tmp12 + push ebx ; tmp11 + push ecx ; tmp10 + + ; -- Odd part + + mov edx, POINTER [esp+16] ; quantptr + + movsx eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + movsx edi, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul di, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)] + movsx ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + movsx ebx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + imul cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul bx, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)] + + push eax ; eax=tmp3 + push edi ; edi=tmp2 + push ecx ; ecx=tmp1 + push ebx ; ebx=tmp0 + + lea esi,[ebx+edi] ; esi=z3 + lea edx,[ecx+eax] ; edx=z4 + add ebx,eax ; ebx=z1 + add ecx,edi ; ecx=z2 + + lea eax,[esi+edx] + imul eax,(F_1_175) ; eax=z5 + + imul esi,(-F_1_961) ; esi=z3(=MULTIPLY(z3,-FIX_1_961570560)) + imul edx,(-F_0_390) ; edx=z4(=MULTIPLY(z4,-FIX_0_390180644)) + imul ebx,(-F_0_899) ; ebx=z1(=MULTIPLY(z1,-FIX_0_899976223)) + imul ecx,(-F_2_562) ; ecx=z2(=MULTIPLY(z2,-FIX_2_562915447)) + + add esi,eax ; esi=z3(=z3+z5) + add edx,eax ; edx=z4(=z4+z5) + + lea edi,[esi+ebx] ; edi=z1+z3 + lea eax,[edx+ecx] ; eax=z2+z4 + add esi,ecx ; esi=z2+z3 + add edx,ebx ; edx=z1+z4 + + pop ecx ; ecx=tmp0 + pop ebx ; ebx=tmp1 + imul ecx,(F_0_298) ; ecx=tmp0(=MULTIPLY(tmp0,FIX_0_298631336)) + imul ebx,(F_2_053) ; ebx=tmp1(=MULTIPLY(tmp1,FIX_2_053119869)) + add edi,ecx ; edi=tmp0(=tmp0+z1+z3) + add eax,ebx ; eax=tmp1(=tmp1+z2+z4) + + pop ecx ; ecx=tmp2 + pop ebx ; ebx=tmp3 + imul ecx,(F_3_072) ; ecx=tmp2(=MULTIPLY(tmp2,FIX_3_072711026)) + imul ebx,(F_1_501) ; ebx=tmp3(=MULTIPLY(tmp3,FIX_1_501321110)) + add esi,ecx ; esi=tmp2(=tmp2+z2+z3) + add edx,ebx ; edx=tmp3(=tmp3+z1+z4) + + ; -- Final output stage + + pop ecx ; ecx=tmp10 + pop ebx ; ebx=tmp11 + push eax ; tmp1 + push edi ; tmp0 + + lea eax,[ecx+edx] ; eax=data0(=tmp10+tmp3) + sub ecx,edx ; ecx=data7(=tmp10-tmp3) + lea edx,[ebx+esi] ; edx=data1(=tmp11+tmp2) + sub ebx,esi ; ebx=data6(=tmp11-tmp2) + + mov edi, POINTER [ptr] ; edi=wsptr + + descale eax,(CONST_BITS-PASS1_BITS) + descale ecx,(CONST_BITS-PASS1_BITS) + descale edx,(CONST_BITS-PASS1_BITS) + descale ebx,(CONST_BITS-PASS1_BITS) + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(7,edi,SIZEOF_INT)], ecx + mov INT [COL(1,edi,SIZEOF_INT)], edx + mov INT [COL(6,edi,SIZEOF_INT)], ebx + + pop esi ; esi=tmp0 + pop eax ; eax=tmp1 + pop ecx ; ecx=tmp12 + pop edx ; edx=tmp13 + + lea ebx,[ecx+eax] ; ebx=data2(=tmp12+tmp1) + sub ecx,eax ; ecx=data5(=tmp12-tmp1) + lea eax,[edx+esi] ; eax=data3(=tmp13+tmp0) + sub edx,esi ; edx=data4(=tmp13-tmp0) + + descale ebx,(CONST_BITS-PASS1_BITS) + descale ecx,(CONST_BITS-PASS1_BITS) + descale eax,(CONST_BITS-PASS1_BITS) + descale edx,(CONST_BITS-PASS1_BITS) + + mov INT [COL(2,edi,SIZEOF_INT)], ebx + mov INT [COL(5,edi,SIZEOF_INT)], ecx + mov INT [COL(3,edi,SIZEOF_INT)], eax + mov INT [COL(4,edi,SIZEOF_INT)], edx + + pop edx ; quantptr + pop esi ; coef_block + pop ecx ; ctr + +.nextcolumn: + add esi, byte SIZEOF_JCOEF ; advance pointers to next column + add edx, byte SIZEOF_ISLOW_MULT_TYPE + add edi, byte SIZEOF_INT + dec ecx + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, POINTER [cinfo(ebp)] + mov eax, POINTER [jdstruct_sample_range_limit(eax)] + sub eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit + mov POINTER [range_limit], eax + + lea esi, [workspace] ; int * wsptr + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.rowloop: + push edi + mov edi, JSAMPROW [edi] ; (JSAMPLE *) + add edi, JDIMENSION [output_col(ebp)] ; edi=outptr + +%ifndef NO_ZERO_ROW_TEST + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + or eax, INT [ROW(2,esi,SIZEOF_INT)] + jnz short .rowDCT + + mov ebx, INT [ROW(3,esi,SIZEOF_INT)] + mov eax, INT [ROW(4,esi,SIZEOF_INT)] + or ebx, INT [ROW(5,esi,SIZEOF_INT)] + or eax, INT [ROW(6,esi,SIZEOF_INT)] + or ebx, INT [ROW(7,esi,SIZEOF_INT)] + or eax,ebx + jnz short .rowDCT + + ; -- AC terms all zero + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + + mov edx, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(PASS1_BITS+3) + and eax,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+4*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+5*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+6*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+7*SIZEOF_JSAMPLE], al + jmp near .nextrow + alignx 16,7 +%endif +.rowDCT: + push esi ; wsptr + push ecx ; ctr + + mov POINTER [ptr], edi ; outptr + + ; -- Even part + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + mov ebx, INT [ROW(2,esi,SIZEOF_INT)] + mov ecx, INT [ROW(4,esi,SIZEOF_INT)] + mov edi, INT [ROW(6,esi,SIZEOF_INT)] + + lea edx,[eax+ecx] + sub eax,ecx + sal edx,CONST_BITS ; edx=tmp0 + sal eax,CONST_BITS ; eax=tmp1 + + lea ecx,[ebx+edi] + imul ecx,(F_0_541) ; ecx=z1 + imul ebx,(F_0_765) ; ebx=MULTIPLY(z2,FIX_0_765366865) + imul edi,(-F_1_847) ; edi=MULTIPLY(z3,-FIX_1_847759065) + add ebx,ecx ; ebx=tmp3 + add edi,ecx ; edi=tmp2 + + lea ecx,[edx+ebx] ; ecx=tmp10 + sub edx,ebx ; edx=tmp13 + lea ebx,[eax+edi] ; ebx=tmp11 + sub eax,edi ; eax=tmp12 + + push edx ; tmp13 + push eax ; tmp12 + push ebx ; tmp11 + push ecx ; tmp10 + + ; -- Odd part + + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + mov edi, INT [ROW(3,esi,SIZEOF_INT)] + mov ecx, INT [ROW(5,esi,SIZEOF_INT)] + mov ebx, INT [ROW(7,esi,SIZEOF_INT)] + + push eax ; eax=tmp3 + push edi ; edi=tmp2 + push ecx ; ecx=tmp1 + push ebx ; ebx=tmp0 + + lea esi,[ebx+edi] ; esi=z3 + lea edx,[ecx+eax] ; edx=z4 + add ebx,eax ; ebx=z1 + add ecx,edi ; ecx=z2 + + lea eax,[esi+edx] + imul eax,(F_1_175) ; eax=z5 + + imul esi,(-F_1_961) ; esi=z3(=MULTIPLY(z3,-FIX_1_961570560)) + imul edx,(-F_0_390) ; edx=z4(=MULTIPLY(z4,-FIX_0_390180644)) + imul ebx,(-F_0_899) ; ebx=z1(=MULTIPLY(z1,-FIX_0_899976223)) + imul ecx,(-F_2_562) ; ecx=z2(=MULTIPLY(z2,-FIX_2_562915447)) + + add esi,eax ; esi=z3(=z3+z5) + add edx,eax ; edx=z4(=z4+z5) + + lea edi,[esi+ebx] ; edi=z1+z3 + lea eax,[edx+ecx] ; eax=z2+z4 + add esi,ecx ; esi=z2+z3 + add edx,ebx ; edx=z1+z4 + + pop ecx ; ecx=tmp0 + pop ebx ; ebx=tmp1 + imul ecx,(F_0_298) ; ecx=tmp0(=MULTIPLY(tmp0,FIX_0_298631336)) + imul ebx,(F_2_053) ; ebx=tmp1(=MULTIPLY(tmp1,FIX_2_053119869)) + add edi,ecx ; edi=tmp0(=tmp0+z1+z3) + add eax,ebx ; eax=tmp1(=tmp1+z2+z4) + + pop ecx ; ecx=tmp2 + pop ebx ; ebx=tmp3 + imul ecx,(F_3_072) ; ecx=tmp2(=MULTIPLY(tmp2,FIX_3_072711026)) + imul ebx,(F_1_501) ; ebx=tmp3(=MULTIPLY(tmp3,FIX_1_501321110)) + add esi,ecx ; esi=tmp2(=tmp2+z2+z3) + add edx,ebx ; edx=tmp3(=tmp3+z1+z4) + + ; -- Final output stage + + pop ecx ; ecx=tmp10 + pop ebx ; ebx=tmp11 + push eax ; tmp1 + push edi ; tmp0 + + lea eax,[ecx+edx] ; eax=data0(=tmp10+tmp3) + sub ecx,edx ; ecx=data7(=tmp10-tmp3) + lea edx,[ebx+esi] ; edx=data1(=tmp11+tmp2) + sub ebx,esi ; ebx=data6(=tmp11-tmp2) + + mov esi, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(CONST_BITS+PASS1_BITS+3) + descale ecx,(CONST_BITS+PASS1_BITS+3) + descale edx,(CONST_BITS+PASS1_BITS+3) + descale ebx,(CONST_BITS+PASS1_BITS+3) + + mov edi, POINTER [ptr] ; edi=outptr + + and eax,RANGE_MASK + and ecx,RANGE_MASK + and edx,RANGE_MASK + and ebx,RANGE_MASK + + mov al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE] + mov cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE] + mov bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE] + + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+7*SIZEOF_JSAMPLE], cl + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], dl + mov JSAMPLE [edi+6*SIZEOF_JSAMPLE], bl + + pop esi ; esi=tmp0 + pop eax ; eax=tmp1 + pop ecx ; ecx=tmp12 + pop edx ; edx=tmp13 + + lea ebx,[ecx+eax] ; ebx=data2(=tmp12+tmp1) + sub ecx,eax ; ecx=data5(=tmp12-tmp1) + lea eax,[edx+esi] ; eax=data3(=tmp13+tmp0) + sub edx,esi ; edx=data4(=tmp13-tmp0) + + mov esi, POINTER [range_limit] ; (JSAMPLE *) + + descale ebx,(CONST_BITS+PASS1_BITS+3) + descale ecx,(CONST_BITS+PASS1_BITS+3) + descale eax,(CONST_BITS+PASS1_BITS+3) + descale edx,(CONST_BITS+PASS1_BITS+3) + + and ebx,RANGE_MASK + and ecx,RANGE_MASK + and eax,RANGE_MASK + and edx,RANGE_MASK + + mov bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE] + mov cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE] + mov al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE] + + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], bl + mov JSAMPLE [edi+5*SIZEOF_JSAMPLE], cl + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+4*SIZEOF_JSAMPLE], dl + + pop ecx ; ctr + pop esi ; wsptr + +.nextrow: + pop edi + add esi, byte DCTSIZE*SIZEOF_INT ; advance pointer to next row + add edi, byte SIZEOF_JSAMPROW + dec ecx + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp + pop ebp + ret + +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jidctred.asm b/jidctred.asm new file mode 100644 index 0000000..4463bfb --- /dev/null +++ b/jidctred.asm @@ -0,0 +1,688 @@ +; +; jidctred.asm - reduced-size IDCT (non-SIMD) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size output: +; either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; Last Modified : October 17, 2004 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef IDCT_SCALING_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +; Descale and correctly round a DWORD value that's scaled by N bits. +; +%macro descale 2 +%if (%2)<=7 + add %1, byte (1<<((%2)-1)) ; add reg32,imm8 +%else + add %1, (1<<((%2)-1)) ; add reg32,imm32 +%endif + sar %1,%2 +%endmacro + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define range_limit ebp-SIZEOF_POINTER ; JSAMPLE * range_limit +%define workspace range_limit-(DCTSIZE*4)*SIZEOF_INT + ; int workspace[DCTSIZE*4] + + align 16 + global EXTN(jpeg_idct_4x4) + +EXTN(jpeg_idct_4x4): + push ebp + mov ebp,esp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process columns from input, store into work array. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + lea edi, [workspace] ; int * wsptr + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.columnloop: + ; Don't bother to process column 4, because second pass won't use it + cmp ecx, byte DCTSIZE-4 + je near .nextcolumn + + mov ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + mov ax, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + mov bx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + or bx, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + or ax,bx + jnz short .columnDCT + + ; -- AC terms all zero; we need not examine term 4 for 4x4 output + + mov ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + cwde + + sal eax, PASS1_BITS + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(1,edi,SIZEOF_INT)], eax + mov INT [COL(2,edi,SIZEOF_INT)], eax + mov INT [COL(3,edi,SIZEOF_INT)], eax + jmp near .nextcolumn + alignx 16,7 + +.columnDCT: + push ecx ; ctr + push esi ; coef_block + push edx ; quantptr + push edi ; wsptr + + ; -- Even part + + movsx ebx, JCOEF [COL(2,esi,SIZEOF_JCOEF)] + movsx ecx, JCOEF [COL(6,esi,SIZEOF_JCOEF)] + movsx eax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul bx, ISLOW_MULT_TYPE [COL(2,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul cx, ISLOW_MULT_TYPE [COL(6,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + imul ebx,(F_1_847) ; ebx=MULTIPLY(z2,FIX_1_847759065) + imul ecx,(-F_0_765) ; ecx=MULTIPLY(z3,-FIX_0_765366865) + sal eax,(CONST_BITS+1) ; eax=tmp0 + add ecx,ebx ; ecx=tmp2 + + lea edi,[eax+ecx] ; edi=tmp10 + sub eax,ecx ; eax=tmp12 + + push eax ; tmp12 + push edi ; tmp10 + + ; -- Odd part + + movsx edi, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + movsx ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + imul di, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)] + movsx ebx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + movsx eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + imul bx, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + mov esi,edi ; esi=edi=z1 + mov edx,ecx ; edx=ecx=z2 + imul edi,(-F_0_211) ; edi=MULTIPLY(z1,-FIX_0_211164243) + imul ecx,(F_1_451) ; ecx=MULTIPLY(z2,FIX_1_451774981) + imul esi,(-F_0_509) ; esi=MULTIPLY(z1,-FIX_0_509795579) + imul edx,(-F_0_601) ; edx=MULTIPLY(z2,-FIX_0_601344887) + + add edi,ecx ; edi=(tmp0) + add esi,edx ; esi=(tmp2) + + mov ecx,ebx ; ecx=ebx=z3 + mov edx,eax ; edx=eax=z4 + imul ebx,(-F_2_172) ; ebx=MULTIPLY(z3,-FIX_2_172734803) + imul eax,(F_1_061) ; eax=MULTIPLY(z4,FIX_1_061594337) + imul ecx,(F_0_899) ; ecx=MULTIPLY(z3,FIX_0_899976223) + imul edx,(F_2_562) ; edx=MULTIPLY(z4,FIX_2_562915447) + + add edi,ebx + add esi,ecx + add edi,eax ; edi=tmp0 + add esi,edx ; esi=tmp2 + + ; -- Final output stage + + pop ebx ; ebx=tmp10 + pop ecx ; ecx=tmp12 + + lea eax,[ebx+esi] ; eax=data0(=tmp10+tmp2) + sub ebx,esi ; ebx=data3(=tmp10-tmp2) + lea edx,[ecx+edi] ; edx=data1(=tmp12+tmp0) + sub ecx,edi ; ecx=data2(=tmp12-tmp0) + + pop edi ; wsptr + + descale eax,(CONST_BITS-PASS1_BITS+1) + descale ebx,(CONST_BITS-PASS1_BITS+1) + descale edx,(CONST_BITS-PASS1_BITS+1) + descale ecx,(CONST_BITS-PASS1_BITS+1) + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(3,edi,SIZEOF_INT)], ebx + mov INT [COL(1,edi,SIZEOF_INT)], edx + mov INT [COL(2,edi,SIZEOF_INT)], ecx + + pop edx ; quantptr + pop esi ; coef_block + pop ecx ; ctr + +.nextcolumn: + add esi, byte SIZEOF_JCOEF ; advance pointers to next column + add edx, byte SIZEOF_ISLOW_MULT_TYPE + add edi, byte SIZEOF_INT + dec ecx + jnz near .columnloop + + ; ---- Pass 2: process 4 rows from work array, store into output array. + + mov eax, POINTER [cinfo(ebp)] + mov eax, POINTER [jdstruct_sample_range_limit(eax)] + sub eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit + mov POINTER [range_limit], eax + + lea esi, [workspace] ; int * wsptr + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov ecx, DCTSIZE/2 ; ctr + alignx 16,7 +.rowloop: + push edi + mov edi, JSAMPROW [edi] ; (JSAMPLE *) + add edi, JDIMENSION [output_col(ebp)] ; edi=outptr + +%ifndef NO_ZERO_ROW_TEST + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + or eax, INT [ROW(2,esi,SIZEOF_INT)] + jnz short .rowDCT + + mov eax, INT [ROW(3,esi,SIZEOF_INT)] + mov ebx, INT [ROW(5,esi,SIZEOF_INT)] + or eax, INT [ROW(6,esi,SIZEOF_INT)] + or ebx, INT [ROW(7,esi,SIZEOF_INT)] + or eax,ebx + jnz short .rowDCT + + ; -- AC terms all zero + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + + mov edx, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(PASS1_BITS+3) + and eax,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], al + jmp near .nextrow + alignx 16,7 +%endif +.rowDCT: + push esi ; wsptr + push ecx ; ctr + push edi ; outptr + + ; -- Even part + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + mov ebx, INT [ROW(2,esi,SIZEOF_INT)] + mov ecx, INT [ROW(6,esi,SIZEOF_INT)] + + imul ebx,(F_1_847) ; ebx=MULTIPLY(z2,FIX_1_847759065) + imul ecx,(-F_0_765) ; ecx=MULTIPLY(z3,-FIX_0_765366865) + sal eax,(CONST_BITS+1) ; eax=tmp0 + add ecx,ebx ; ecx=tmp2 + + lea edi,[eax+ecx] ; edi=tmp10 + sub eax,ecx ; eax=tmp12 + + push eax ; tmp12 + push edi ; tmp10 + + ; -- Odd part + + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + mov ebx, INT [ROW(3,esi,SIZEOF_INT)] + mov ecx, INT [ROW(5,esi,SIZEOF_INT)] + mov edi, INT [ROW(7,esi,SIZEOF_INT)] + + mov esi,edi ; esi=edi=z1 + mov edx,ecx ; edx=ecx=z2 + imul edi,(-F_0_211) ; edi=MULTIPLY(z1,-FIX_0_211164243) + imul ecx,(F_1_451) ; ecx=MULTIPLY(z2,FIX_1_451774981) + imul esi,(-F_0_509) ; esi=MULTIPLY(z1,-FIX_0_509795579) + imul edx,(-F_0_601) ; edx=MULTIPLY(z2,-FIX_0_601344887) + + add edi,ecx ; edi=(tmp0) + add esi,edx ; esi=(tmp2) + + mov ecx,ebx ; ecx=ebx=z3 + mov edx,eax ; edx=eax=z4 + imul ebx,(-F_2_172) ; ebx=MULTIPLY(z3,-FIX_2_172734803) + imul eax,(F_1_061) ; eax=MULTIPLY(z4,FIX_1_061594337) + imul ecx,(F_0_899) ; ecx=MULTIPLY(z3,FIX_0_899976223) + imul edx,(F_2_562) ; edx=MULTIPLY(z4,FIX_2_562915447) + + add edi,ebx + add esi,ecx + add edi,eax ; edi=tmp0 + add esi,edx ; esi=tmp2 + + ; -- Final output stage + + pop ebx ; ebx=tmp10 + pop ecx ; ecx=tmp12 + + lea eax,[ebx+esi] ; eax=data0(=tmp10+tmp2) + sub ebx,esi ; ebx=data3(=tmp10-tmp2) + lea edx,[ecx+edi] ; edx=data1(=tmp12+tmp0) + sub ecx,edi ; ecx=data2(=tmp12-tmp0) + + mov esi, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(CONST_BITS+PASS1_BITS+3+1) + descale ebx,(CONST_BITS+PASS1_BITS+3+1) + descale edx,(CONST_BITS+PASS1_BITS+3+1) + descale ecx,(CONST_BITS+PASS1_BITS+3+1) + + pop edi ; outptr + + and eax,RANGE_MASK + and ebx,RANGE_MASK + and edx,RANGE_MASK + and ecx,RANGE_MASK + + mov al, JSAMPLE [esi+eax*SIZEOF_JSAMPLE] + mov bl, JSAMPLE [esi+ebx*SIZEOF_JSAMPLE] + mov dl, JSAMPLE [esi+edx*SIZEOF_JSAMPLE] + mov cl, JSAMPLE [esi+ecx*SIZEOF_JSAMPLE] + + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+3*SIZEOF_JSAMPLE], bl + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], dl + mov JSAMPLE [edi+2*SIZEOF_JSAMPLE], cl + + pop ecx ; ctr + pop esi ; wsptr + +.nextrow: + pop edi + add esi, byte DCTSIZE*SIZEOF_INT ; advance pointer to next row + add edi, byte SIZEOF_JSAMPROW + dec ecx + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define range_limit ebp-SIZEOF_POINTER ; JSAMPLE * range_limit +%define workspace range_limit-(DCTSIZE*2)*SIZEOF_INT + ; int workspace[DCTSIZE*2] + + align 16 + global EXTN(jpeg_idct_2x2) + +EXTN(jpeg_idct_2x2): + push ebp + mov ebp,esp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + ; ---- Pass 1: process columns from input, store into work array. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + lea edi, [workspace] ; int * wsptr + mov ecx, DCTSIZE ; ctr + alignx 16,7 +.columnloop: + ; Don't bother to process columns 2,4,6 + test ecx, 0x09 + jz near .nextcolumn + + mov ax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + mov ax, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + or ax, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + ; -- AC terms all zero; we need not examine terms 2,4,6 for 2x2 output + + mov ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + cwde + + sal eax, PASS1_BITS + + mov INT [COL(0,edi,SIZEOF_INT)], eax + mov INT [COL(1,edi,SIZEOF_INT)], eax + jmp short .nextcolumn + alignx 16,7 + +.columnDCT: + push ecx ; ctr + push edi ; wsptr + + ; -- Odd part + + movsx eax, JCOEF [COL(1,esi,SIZEOF_JCOEF)] + movsx ebx, JCOEF [COL(3,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(1,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul bx, ISLOW_MULT_TYPE [COL(3,edx,SIZEOF_ISLOW_MULT_TYPE)] + movsx ecx, JCOEF [COL(5,esi,SIZEOF_JCOEF)] + movsx edi, JCOEF [COL(7,esi,SIZEOF_JCOEF)] + imul cx, ISLOW_MULT_TYPE [COL(5,edx,SIZEOF_ISLOW_MULT_TYPE)] + imul di, ISLOW_MULT_TYPE [COL(7,edx,SIZEOF_ISLOW_MULT_TYPE)] + + imul eax,(F_3_624) ; eax=MULTIPLY(data1,FIX_3_624509785) + imul ebx,(-F_1_272) ; ebx=MULTIPLY(data3,-FIX_1_272758580) + imul ecx,(F_0_850) ; ecx=MULTIPLY(data5,FIX_0_850430095) + imul edi,(-F_0_720) ; edi=MULTIPLY(data7,-FIX_0_720959822) + + add eax,ebx + add ecx,edi + add ecx,eax ; ecx=tmp0 + + ; -- Even part + + mov ax, JCOEF [COL(0,esi,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + cwde + + sal eax,(CONST_BITS+2) ; eax=tmp10 + + ; -- Final output stage + + pop edi ; wsptr + + lea ebx,[eax+ecx] ; ebx=data0(=tmp10+tmp0) + sub eax,ecx ; eax=data1(=tmp10-tmp0) + + pop ecx ; ctr + + descale ebx,(CONST_BITS-PASS1_BITS+2) + descale eax,(CONST_BITS-PASS1_BITS+2) + + mov INT [COL(0,edi,SIZEOF_INT)], ebx + mov INT [COL(1,edi,SIZEOF_INT)], eax + +.nextcolumn: + add esi, byte SIZEOF_JCOEF ; advance pointers to next column + add edx, byte SIZEOF_ISLOW_MULT_TYPE + add edi, byte SIZEOF_INT + dec ecx + jnz near .columnloop + + ; ---- Pass 2: process 2 rows from work array, store into output array. + + mov eax, POINTER [cinfo(ebp)] + mov eax, POINTER [jdstruct_sample_range_limit(eax)] + sub eax, byte -CENTERJSAMPLE*SIZEOF_JSAMPLE ; JSAMPLE * range_limit + mov POINTER [range_limit], eax + + lea esi, [workspace] ; int * wsptr + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + push edi + mov edi, JSAMPROW [edi] ; (JSAMPLE *) + add edi, JDIMENSION [output_col(ebp)] ; edi=outptr + +%ifndef NO_ZERO_ROW_TEST + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + or eax, INT [ROW(3,esi,SIZEOF_INT)] + jnz short .rowDCT + + mov eax, INT [ROW(5,esi,SIZEOF_INT)] + or eax, INT [ROW(7,esi,SIZEOF_INT)] + jnz short .rowDCT + + ; -- AC terms all zero + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + + mov edx, POINTER [range_limit] ; (JSAMPLE *) + + descale eax,(PASS1_BITS+3) + and eax,RANGE_MASK + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], al + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + jmp short .nextrow + alignx 16,7 +%endif +.rowDCT: + push ecx ; ctr + + ; -- Odd part + + mov eax, INT [ROW(1,esi,SIZEOF_INT)] + mov ebx, INT [ROW(3,esi,SIZEOF_INT)] + mov ecx, INT [ROW(5,esi,SIZEOF_INT)] + mov edx, INT [ROW(7,esi,SIZEOF_INT)] + + imul eax,(F_3_624) ; eax=MULTIPLY(data1,FIX_3_624509785) + imul ebx,(-F_1_272) ; ebx=MULTIPLY(data3,-FIX_1_272758580) + imul ecx,(F_0_850) ; ecx=MULTIPLY(data5,FIX_0_850430095) + imul edx,(-F_0_720) ; edx=MULTIPLY(data7,-FIX_0_720959822) + + add eax,ebx + add ecx,edx + add ecx,eax ; ecx=tmp0 + + ; -- Even part + + mov eax, INT [ROW(0,esi,SIZEOF_INT)] + + sal eax,(CONST_BITS+2) ; eax=tmp10 + + ; -- Final output stage + + mov edx, POINTER [range_limit] ; (JSAMPLE *) + + lea ebx,[eax+ecx] ; ebx=data0(=tmp10+tmp0) + sub eax,ecx ; eax=data1(=tmp10-tmp0) + + pop ecx ; ctr + + descale ebx,(CONST_BITS+PASS1_BITS+3+2) + descale eax,(CONST_BITS+PASS1_BITS+3+2) + + and ebx,RANGE_MASK + and eax,RANGE_MASK + mov bl, JSAMPLE [edx+ebx*SIZEOF_JSAMPLE] + mov al, JSAMPLE [edx+eax*SIZEOF_JSAMPLE] + mov JSAMPLE [edi+0*SIZEOF_JSAMPLE], bl + mov JSAMPLE [edi+1*SIZEOF_JSAMPLE], al + +.nextrow: + pop edi + add esi, byte DCTSIZE*SIZEOF_INT ; advance pointer to next row + add edi, byte SIZEOF_JSAMPROW + dec ecx + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 1x1 output block. +; +; GLOBAL(void) +; jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define ebp esp-4 ; use esp instead of ebp + + align 16 + global EXTN(jpeg_idct_1x1) + +EXTN(jpeg_idct_1x1): +; push ebp +; mov ebp,esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + ; We hardly need an inverse DCT routine for this: just take the + ; average pixel value, which is one-eighth of the DC coefficient. + + mov edx, POINTER [compptr(ebp)] + mov ecx, JCOEFPTR [coef_block(ebp)] ; inptr + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + + mov ax, JCOEF [COL(0,ecx,SIZEOF_JCOEF)] + imul ax, ISLOW_MULT_TYPE [COL(0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + mov ecx, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov edx, JDIMENSION [output_col(ebp)] + mov ecx, JSAMPROW [ecx] ; (JSAMPLE *) + + add ax, (1 << (3-1)) + (CENTERJSAMPLE << 3) + sar ax,3 ; descale + + test ah,ah ; unsigned saturation + jz short .output + not ax + sar ax,15 + alignx 16,3 +.output: + mov JSAMPLE [ecx+edx*SIZEOF_JSAMPLE], al + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused +; pop ebp + ret + +%endif ; IDCT_SCALING_SUPPORTED diff --git a/jimmxfst.asm b/jimmxfst.asm new file mode 100644 index 0000000..de0def6 --- /dev/null +++ b/jimmxfst.asm @@ -0,0 +1,510 @@ +; +; jimmxfst.asm - fast integer IDCT (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED +%ifdef JIDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_mmx) + +EXTN(jconst_idct_ifast_mmx): + +PW_F1414 times 4 dw F_1_414 << CONST_SHIFT +PW_F1847 times 4 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 4 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_ifast_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_ifast_mmx) + +EXTN(jpeg_idct_ifast_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF * wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 01 02 03) + paddw mm7,mm0 ; mm7=data1=(10 11 12 13) + psubw mm1,mm3 ; mm1=data7=(70 71 72 73) + psubw mm5,mm0 ; mm5=data6=(60 61 62 63) + psubw mm4,mm0 ; mm4=tmp5 + + movq mm3,mm6 ; transpose coefficients(phase 1) + punpcklwd mm6,mm7 ; mm6=(00 10 01 11) + punpckhwd mm3,mm7 ; mm3=(02 12 03 13) + movq mm0,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm1 ; mm5=(60 70 61 71) + punpckhwd mm0,mm1 ; mm0=(62 72 63 73) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm1, MMWORD [wk(1)] ; mm1=tmp3 + + movq MMWORD [wk(0)], mm5 ; wk(0)=(60 70 61 71) + movq MMWORD [wk(1)], mm0 ; wk(1)=(62 72 63 73) + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm7 + movq mm0,mm1 + paddw mm7,mm4 ; mm7=data2=(20 21 22 23) + paddw mm1,mm2 ; mm1=data4=(40 41 42 43) + psubw mm5,mm4 ; mm5=data5=(50 51 52 53) + psubw mm0,mm2 ; mm0=data3=(30 31 32 33) + + movq mm4,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm0 ; mm7=(20 30 21 31) + punpckhwd mm4,mm0 ; mm4=(22 32 23 33) + movq mm2,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm5 ; mm1=(40 50 41 51) + punpckhwd mm2,mm5 ; mm2=(42 52 43 53) + + movq mm0,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(00 10 20 30) + punpckhdq mm0,mm7 ; mm0=(01 11 21 31) + movq mm5,mm3 ; transpose coefficients(phase 2) + punpckldq mm3,mm4 ; mm3=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=(60 70 61 71) + movq mm4, MMWORD [wk(1)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm6,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm7 ; mm1=(40 50 60 70) + punpckhdq mm6,mm7 ; mm6=(41 51 61 71) + movq mm0,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(42 52 62 72) + punpckhdq mm0,mm4 ; mm0=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_IFAST_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm1 + psubw mm0,mm2 ; mm0=tmp11 + psubw mm1,mm3 + paddw mm4,mm2 ; mm4=tmp10 + paddw mm5,mm3 ; mm5=tmp13 + + psllw mm1,PRE_MULTIPLY_SCALE_BITS + pmulhw mm1,[GOTOFF(ebx,PW_F1414)] + psubw mm1,mm5 ; mm1=tmp12 + + movq mm6,mm4 + movq mm7,mm0 + psubw mm4,mm5 ; mm4=tmp3 + psubw mm0,mm1 ; mm0=tmp2 + paddw mm6,mm5 ; mm6=tmp0 + paddw mm7,mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm2 + movq mm0,mm5 + psubw mm2,mm1 ; mm2=z12 + psubw mm5,mm3 ; mm5=z10 + paddw mm4,mm1 ; mm4=z11 + paddw mm0,mm3 ; mm0=z13 + + movq mm1,mm5 ; mm1=z10(unscaled) + psllw mm2,PRE_MULTIPLY_SCALE_BITS + psllw mm5,PRE_MULTIPLY_SCALE_BITS + + movq mm3,mm4 + psubw mm4,mm0 + paddw mm3,mm0 ; mm3=tmp7 + + psllw mm4,PRE_MULTIPLY_SCALE_BITS + pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0,mm5 + paddw mm5,mm2 + pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw mm2,[GOTOFF(ebx,PW_F1082)] + psubw mm0,mm1 + psubw mm2,mm5 ; mm2=tmp10 + paddw mm0,mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0,mm3 ; mm0=tmp6 + movq mm1,mm6 + movq mm5,mm7 + paddw mm6,mm3 ; mm6=data0=(00 10 20 30) + paddw mm7,mm0 ; mm7=data1=(01 11 21 31) + psraw mm6,(PASS1_BITS+3) ; descale + psraw mm7,(PASS1_BITS+3) ; descale + psubw mm1,mm3 ; mm1=data7=(07 17 27 37) + psubw mm5,mm0 ; mm5=data6=(06 16 26 36) + psraw mm1,(PASS1_BITS+3) ; descale + psraw mm5,(PASS1_BITS+3) ; descale + psubw mm4,mm0 ; mm4=tmp5 + + packsswb mm6,mm5 ; mm6=(00 10 20 30 06 16 26 36) + packsswb mm7,mm1 ; mm7=(01 11 21 31 07 17 27 37) + + movq mm3, MMWORD [wk(0)] ; mm3=tmp2 + movq mm0, MMWORD [wk(1)] ; mm0=tmp3 + + paddw mm2,mm4 ; mm2=tmp4 + movq mm5,mm3 + movq mm1,mm0 + paddw mm3,mm4 ; mm3=data2=(02 12 22 32) + paddw mm0,mm2 ; mm0=data4=(04 14 24 34) + psraw mm3,(PASS1_BITS+3) ; descale + psraw mm0,(PASS1_BITS+3) ; descale + psubw mm5,mm4 ; mm5=data5=(05 15 25 35) + psubw mm1,mm2 ; mm1=data3=(03 13 23 33) + psraw mm5,(PASS1_BITS+3) ; descale + psraw mm1,(PASS1_BITS+3) ; descale + + movq mm4,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm4=[PB_CENTERJSAMP] + + packsswb mm3,mm0 ; mm3=(02 12 22 32 04 14 24 34) + packsswb mm1,mm5 ; mm1=(03 13 23 33 05 15 25 35) + + paddb mm6,mm4 + paddb mm7,mm4 + paddb mm3,mm4 + paddb mm1,mm4 + + movq mm2,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm7 ; mm6=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm7 ; mm2=(06 07 16 17 26 27 36 37) + movq mm0,mm3 ; transpose coefficients(phase 1) + punpcklbw mm3,mm1 ; mm3=(02 03 12 13 22 23 32 33) + punpckhbw mm0,mm1 ; mm0=(04 05 14 15 24 25 34 35) + + movq mm5,mm6 ; transpose coefficients(phase 2) + punpcklwd mm6,mm3 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm3 ; mm5=(20 21 22 23 30 31 32 33) + movq mm4,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(04 05 06 07 14 15 16 17) + punpckhwd mm4,mm2 ; mm4=(24 25 26 27 34 35 36 37) + + movq mm7,mm6 ; transpose coefficients(phase 3) + punpckldq mm6,mm0 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7,mm0 ; mm7=(10 11 12 13 14 15 16 17) + movq mm1,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm4 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm1,mm4 ; mm1=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_INT_MMX_SUPPORTED +%endif ; DCT_IFAST_SUPPORTED diff --git a/jimmxint.asm b/jimmxint.asm new file mode 100644 index 0000000..2a33a63 --- /dev/null +++ b/jimmxint.asm @@ -0,0 +1,862 @@ +; +; jimmxint.asm - accurate integer IDCT (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED +%ifdef JIDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_mmx) + +EXTN(jconst_idct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_islow_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 12 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_islow_mmx) + +EXTN(jpeg_idct_islow_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF * wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P1 + psrad mm7,DESCALE_P1 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm5,mm7 ; mm5=data0=(00 01 02 03) + packssdw mm2,mm0 ; mm2=data7=(70 71 72 73) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P1 + psrad mm3,DESCALE_P1 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P1 + psrad mm0,DESCALE_P1 + + packssdw mm4,mm3 ; mm4=data1=(10 11 12 13) + packssdw mm7,mm0 ; mm7=data6=(60 61 62 63) + + movq mm6,mm5 ; transpose coefficients(phase 1) + punpcklwd mm5,mm4 ; mm5=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm1,mm7 ; transpose coefficients(phase 1) + punpcklwd mm7,mm2 ; mm7=(60 70 61 71) + punpckhwd mm1,mm2 ; mm1=(62 72 63 73) + + movq mm3, MMWORD [wk(6)] ; mm3=tmp12L + movq mm0, MMWORD [wk(7)] ; mm0=tmp12H + movq mm4, MMWORD [wk(10)] ; mm4=tmp1L + movq mm2, MMWORD [wk(11)] ; mm2=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 01 11) + movq MMWORD [wk(1)], mm6 ; wk(1)=(02 12 03 13) + movq MMWORD [wk(4)], mm7 ; wk(4)=(60 70 61 71) + movq MMWORD [wk(5)], mm1 ; wk(5)=(62 72 63 73) + + movq mm5,mm3 + movq mm6,mm0 + paddd mm3,mm4 ; mm3=data2L + paddd mm0,mm2 ; mm0=data2H + psubd mm5,mm4 ; mm5=data5L + psubd mm6,mm2 ; mm6=data5H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1] + + paddd mm3,mm7 + paddd mm0,mm7 + psrad mm3,DESCALE_P1 + psrad mm0,DESCALE_P1 + paddd mm5,mm7 + paddd mm6,mm7 + psrad mm5,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm3,mm0 ; mm3=data2=(20 21 22 23) + packssdw mm5,mm6 ; mm5=data5=(50 51 52 53) + + movq mm1, MMWORD [wk(2)] ; mm1=tmp13L + movq mm4, MMWORD [wk(3)] ; mm4=tmp13H + movq mm2, MMWORD [wk(8)] ; mm2=tmp0L + movq mm7, MMWORD [wk(9)] ; mm7=tmp0H + + movq mm0,mm1 + movq mm6,mm4 + paddd mm1,mm2 ; mm1=data3L + paddd mm4,mm7 ; mm4=data3H + psubd mm0,mm2 ; mm0=data4L + psubd mm6,mm7 ; mm6=data4H + + movq mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1] + + paddd mm1,mm2 + paddd mm4,mm2 + psrad mm1,DESCALE_P1 + psrad mm4,DESCALE_P1 + paddd mm0,mm2 + paddd mm6,mm2 + psrad mm0,DESCALE_P1 + psrad mm6,DESCALE_P1 + + packssdw mm1,mm4 ; mm1=data3=(30 31 32 33) + packssdw mm0,mm6 ; mm0=data4=(40 41 42 43) + + movq mm7, MMWORD [wk(0)] ; mm7=(00 10 01 11) + movq mm2, MMWORD [wk(1)] ; mm2=(02 12 03 13) + + movq mm4,mm3 ; transpose coefficients(phase 1) + punpcklwd mm3,mm1 ; mm3=(20 30 21 31) + punpckhwd mm4,mm1 ; mm4=(22 32 23 33) + movq mm6,mm0 ; transpose coefficients(phase 1) + punpcklwd mm0,mm5 ; mm0=(40 50 41 51) + punpckhwd mm6,mm5 ; mm6=(42 52 43 53) + + movq mm1,mm7 ; transpose coefficients(phase 2) + punpckldq mm7,mm3 ; mm7=(00 10 20 30) + punpckhdq mm1,mm3 ; mm1=(01 11 21 31) + movq mm5,mm2 ; transpose coefficients(phase 2) + punpckldq mm2,mm4 ; mm2=(02 12 22 32) + punpckhdq mm5,mm4 ; mm5=(03 13 23 33) + + movq mm3, MMWORD [wk(4)] ; mm3=(60 70 61 71) + movq mm4, MMWORD [wk(5)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpckldq mm0,mm3 ; mm0=(40 50 60 70) + punpckhdq mm7,mm3 ; mm7=(41 51 61 71) + movq mm1,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm4 ; mm6=(42 52 62 72) + punpckhdq mm1,mm4 ; mm1=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4,mm1 ; mm1=in2=z2 + movq mm5,mm1 + punpcklwd mm4,mm3 ; mm3=in6=z3 + punpckhwd mm5,mm3 + movq mm1,mm4 + movq mm3,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6,mm0 + paddw mm0,mm2 ; mm0=in0+in4 + psubw mm6,mm2 ; mm6=in0-in4 + + pxor mm7,mm7 + pxor mm2,mm2 + punpcklwd mm7,mm0 ; mm7=tmp0L + punpckhwd mm2,mm0 ; mm2=tmp0H + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0,mm7 + paddd mm7,mm4 ; mm7=tmp10L + psubd mm0,mm4 ; mm0=tmp13L + movq mm4,mm2 + paddd mm2,mm5 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5,mm5 + pxor mm7,mm7 + punpcklwd mm5,mm6 ; mm5=tmp1L + punpckhwd mm7,mm6 ; mm7=tmp1H + psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2,mm5 + paddd mm5,mm1 ; mm5=tmp11L + psubd mm2,mm1 ; mm2=tmp12L + movq mm0,mm7 + paddd mm7,mm3 ; mm7=tmp11H + psubd mm0,mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm5,mm6 + movq mm7,mm4 + paddw mm5,mm3 ; mm5=z3 + paddw mm7,mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2,mm5 + movq mm0,mm5 + punpcklwd mm2,mm7 + punpckhwd mm0,mm7 + movq mm5,mm2 + movq mm7,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2,mm3 + movq mm0,mm3 + punpcklwd mm2,mm4 + punpckhwd mm0,mm4 + movq mm3,mm2 + movq mm4,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2,mm1 + movq mm0,mm1 + punpcklwd mm2,mm6 + punpckhwd mm0,mm6 + movq mm1,mm2 + movq mm6,mm0 + pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2,mm5 + movq mm0,mm7 + paddd mm5,mm3 ; mm5=data0L + paddd mm7,mm4 ; mm7=data0H + psubd mm2,mm3 ; mm2=data7L + psubd mm0,mm4 ; mm0=data7H + + movq mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2] + + paddd mm5,mm3 + paddd mm7,mm3 + psrad mm5,DESCALE_P2 + psrad mm7,DESCALE_P2 + paddd mm2,mm3 + paddd mm0,mm3 + psrad mm2,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm5,mm7 ; mm5=data0=(00 10 20 30) + packssdw mm2,mm0 ; mm2=data7=(07 17 27 37) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7,mm4 + movq mm0,mm3 + paddd mm4,mm1 ; mm4=data1L + paddd mm3,mm6 ; mm3=data1H + psubd mm7,mm1 ; mm7=data6L + psubd mm0,mm6 ; mm0=data6H + + movq mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2] + + paddd mm4,mm1 + paddd mm3,mm1 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm7,mm1 + paddd mm0,mm1 + psrad mm7,DESCALE_P2 + psrad mm0,DESCALE_P2 + + packssdw mm4,mm3 ; mm4=data1=(01 11 21 31) + packssdw mm7,mm0 ; mm7=data6=(06 16 26 36) + + packsswb mm5,mm7 ; mm5=(00 10 20 30 06 16 26 36) + packsswb mm4,mm2 ; mm4=(01 11 21 31 07 17 27 37) + + movq mm6, MMWORD [wk(6)] ; mm6=tmp12L + movq mm1, MMWORD [wk(7)] ; mm1=tmp12H + movq mm3, MMWORD [wk(10)] ; mm3=tmp1L + movq mm0, MMWORD [wk(11)] ; mm0=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 20 30 06 16 26 36) + movq MMWORD [wk(1)], mm4 ; wk(1)=(01 11 21 31 07 17 27 37) + + movq mm7,mm6 + movq mm2,mm1 + paddd mm6,mm3 ; mm6=data2L + paddd mm1,mm0 ; mm1=data2H + psubd mm7,mm3 ; mm7=data5L + psubd mm2,mm0 ; mm2=data5H + + movq mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2] + + paddd mm6,mm5 + paddd mm1,mm5 + psrad mm6,DESCALE_P2 + psrad mm1,DESCALE_P2 + paddd mm7,mm5 + paddd mm2,mm5 + psrad mm7,DESCALE_P2 + psrad mm2,DESCALE_P2 + + packssdw mm6,mm1 ; mm6=data2=(02 12 22 32) + packssdw mm7,mm2 ; mm7=data5=(05 15 25 35) + + movq mm4, MMWORD [wk(2)] ; mm4=tmp13L + movq mm3, MMWORD [wk(3)] ; mm3=tmp13H + movq mm0, MMWORD [wk(8)] ; mm0=tmp0L + movq mm5, MMWORD [wk(9)] ; mm5=tmp0H + + movq mm1,mm4 + movq mm2,mm3 + paddd mm4,mm0 ; mm4=data3L + paddd mm3,mm5 ; mm3=data3H + psubd mm1,mm0 ; mm1=data4L + psubd mm2,mm5 ; mm2=data4H + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2] + + paddd mm4,mm0 + paddd mm3,mm0 + psrad mm4,DESCALE_P2 + psrad mm3,DESCALE_P2 + paddd mm1,mm0 + paddd mm2,mm0 + psrad mm1,DESCALE_P2 + psrad mm2,DESCALE_P2 + + movq mm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm5=[PB_CENTERJSAMP] + + packssdw mm4,mm3 ; mm4=data3=(03 13 23 33) + packssdw mm1,mm2 ; mm1=data4=(04 14 24 34) + + movq mm0, MMWORD [wk(0)] ; mm0=(00 10 20 30 06 16 26 36) + movq mm3, MMWORD [wk(1)] ; mm3=(01 11 21 31 07 17 27 37) + + packsswb mm6,mm1 ; mm6=(02 12 22 32 04 14 24 34) + packsswb mm4,mm7 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm5 + paddb mm3,mm5 + paddb mm6,mm5 + paddb mm4,mm5 + + movq mm2,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm3 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm2,mm3 ; mm2=(06 07 16 17 26 27 36 37) + movq mm1,mm6 ; transpose coefficients(phase 1) + punpcklbw mm6,mm4 ; mm6=(02 03 12 13 22 23 32 33) + punpckhbw mm1,mm4 ; mm1=(04 05 14 15 24 25 34 35) + + movq mm7,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm6 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm7,mm6 ; mm7=(20 21 22 23 30 31 32 33) + movq mm5,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm2 ; mm1=(04 05 06 07 14 15 16 17) + punpckhwd mm5,mm2 ; mm5=(24 25 26 27 34 35 36 37) + + movq mm3,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm1 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm3,mm1 ; mm3=(10 11 12 13 14 15 16 17) + movq mm4,mm7 ; transpose coefficients(phase 3) + punpckldq mm7,mm5 ; mm7=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm5 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_INT_MMX_SUPPORTED +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jimmxred.asm b/jimmxred.asm new file mode 100644 index 0000000..491fa7b --- /dev/null +++ b/jimmxred.asm @@ -0,0 +1,719 @@ +; +; jimmxred.asm - reduced-size IDCT (MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef IDCT_SCALING_SUPPORTED +%ifdef JIDCT_INT_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_mmx) + +EXTN(jconst_idct_red_mmx): + +PW_F184_MF076 times 2 dw F_1_847,-F_0_765 +PW_F256_F089 times 2 dw F_2_562, F_0_899 +PW_F106_MF217 times 2 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 2 dw F_1_451,-F_0_211 +PW_F362_MF127 times 2 dw F_3_624,-F_1_272 +PW_F085_MF072 times 2 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jpeg_idct_4x4_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_4x4_mmx) + +EXTN(jpeg_idct_4x4_mmx): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF * wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm0,mm1 + packsswb mm0,mm0 + movd eax,mm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm2,mm2 ; mm2=(02 02 03 03) + + movq mm1,mm0 + punpckldq mm0,mm0 ; mm0=(00 00 00 00) + punpckhdq mm1,mm1 ; mm1=(01 01 01 01) + movq mm3,mm2 + punpckldq mm2,mm2 ; mm2=(02 02 02 02) + punpckhdq mm3,mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm6=[PD_DESCALE_P1_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P1_4 + psrad mm2,DESCALE_P1_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm1,mm2 ; mm1=data0=(00 01 02 03) + packssdw mm5,mm3 ; mm5=data3=(30 31 32 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm7=[PD_DESCALE_P1_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P1_4 + psrad mm0,DESCALE_P1_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P1_4 + psrad mm3,DESCALE_P1_4 + + packssdw mm4,mm0 ; mm4=data1=(10 11 12 13) + packssdw mm2,mm3 ; mm2=data2=(20 21 22 23) + + movq mm6,mm1 ; transpose coefficients(phase 1) + punpcklwd mm1,mm4 ; mm1=(00 10 01 11) + punpckhwd mm6,mm4 ; mm6=(02 12 03 13) + movq mm7,mm2 ; transpose coefficients(phase 1) + punpcklwd mm2,mm5 ; mm2=(20 30 21 31) + punpckhwd mm7,mm5 ; mm7=(22 32 23 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpckldq mm1,mm2 ; mm1=(00 10 20 30) + punpckhdq mm0,mm2 ; mm0=(01 11 21 31) + movq mm3,mm6 ; transpose coefficients(phase 2) + punpckldq mm6,mm7 ; mm6=(02 12 22 32) + punpckhdq mm3,mm7 ; mm3=(03 13 23 33) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4,mm0 + movq mm5,mm0 + punpcklwd mm4,mm1 + punpckhwd mm5,mm1 + movq mm0,mm4 + movq mm1,mm5 + pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6,mm2 + movq mm7,mm2 + punpcklwd mm6,mm3 + punpckhwd mm7,mm3 + movq mm2,mm6 + movq mm3,mm7 + pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6,mm4 ; mm6=tmp2L + paddd mm7,mm5 ; mm7=tmp2H + paddd mm2,mm0 ; mm2=tmp0L + paddd mm3,mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + pxor mm1,mm1 + pxor mm2,mm2 + punpcklwd mm1,mm4 ; mm1=tmp0L + punpckhwd mm2,mm4 ; mm2=tmp0H + psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3,mm5 ; mm5=in2=z2 + punpcklwd mm5,mm0 ; mm0=in6=z3 + punpckhwd mm3,mm0 + pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4,mm1 + movq mm0,mm2 + paddd mm1,mm5 ; mm1=tmp10L + paddd mm2,mm3 ; mm2=tmp10H + psubd mm4,mm5 ; mm4=tmp12L + psubd mm0,mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5,mm1 + movq mm3,mm2 + paddd mm1,mm6 ; mm1=data0L + paddd mm2,mm7 ; mm2=data0H + psubd mm5,mm6 ; mm5=data3L + psubd mm3,mm7 ; mm3=data3H + + movq mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm6=[PD_DESCALE_P2_4] + + paddd mm1,mm6 + paddd mm2,mm6 + psrad mm1,DESCALE_P2_4 + psrad mm2,DESCALE_P2_4 + paddd mm5,mm6 + paddd mm3,mm6 + psrad mm5,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm1,mm2 ; mm1=data0=(00 10 20 30) + packssdw mm5,mm3 ; mm5=data3=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2,mm4 + movq mm3,mm0 + paddd mm4,mm7 ; mm4=data1L + paddd mm0,mm6 ; mm0=data1H + psubd mm2,mm7 ; mm2=data2L + psubd mm3,mm6 ; mm3=data2H + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm7=[PD_DESCALE_P2_4] + + paddd mm4,mm7 + paddd mm0,mm7 + psrad mm4,DESCALE_P2_4 + psrad mm0,DESCALE_P2_4 + paddd mm2,mm7 + paddd mm3,mm7 + psrad mm2,DESCALE_P2_4 + psrad mm3,DESCALE_P2_4 + + packssdw mm4,mm0 ; mm4=data1=(01 11 21 31) + packssdw mm2,mm3 ; mm2=data2=(02 12 22 32) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm1,mm2 ; mm1=(00 10 20 30 02 12 22 32) + packsswb mm4,mm5 ; mm4=(01 11 21 31 03 13 23 33) + paddb mm1,mm6 + paddb mm4,mm6 + + movq mm7,mm1 ; transpose coefficients(phase 1) + punpcklbw mm1,mm4 ; mm1=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm4 ; mm7=(02 03 12 13 22 23 32 33) + + movq mm0,mm1 ; transpose coefficients(phase 2) + punpcklwd mm1,mm7 ; mm1=(00 01 02 03 10 11 12 13) + punpckhwd mm0,mm7 ; mm0=(20 21 22 23 30 31 32 33) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + psrlq mm1,4*BYTE_BIT + psrlq mm0,4*BYTE_BIT + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jpeg_idct_2x2_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + + align 16 + global EXTN(jpeg_idct_2x2_mmx) + +EXTN(jpeg_idct_2x2_mmx): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) + ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) + + pcmpeqd mm7,mm7 + pslld mm7,WORD_BIT ; mm7={0x0000 0xFFFF 0x0000 0xFFFF} + + movq mm4,mm0 ; mm4=(10 11 ** 13) + movq mm5,mm2 ; mm5=(50 51 ** 53) + punpcklwd mm4,mm1 ; mm4=(10 30 11 31) + punpcklwd mm5,mm3 ; mm5=(50 70 51 71) + pmaddwd mm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld mm0,WORD_BIT ; mm0=(11 -- 13 --) + pand mm1,mm7 ; mm1=(-- 31 -- 33) + psrld mm2,WORD_BIT ; mm2=(51 -- 53 --) + pand mm3,mm7 ; mm3=(-- 71 -- 73) + por mm0,mm1 ; mm0=(11 31 13 33) + por mm2,mm3 ; mm2=(51 71 53 73) + pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm4,mm5 ; mm4=tmp0[col0 col1] + + movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] + pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] + pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) + ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) + + psrld mm6,WORD_BIT ; mm6=(15 -- 17 --) + pand mm1,mm7 ; mm1=(-- 35 -- 37) + psrld mm3,WORD_BIT ; mm3=(55 -- 57 --) + pand mm5,mm7 ; mm5=(-- 75 -- 77) + por mm6,mm1 ; mm6=(15 35 17 37) + por mm3,mm5 ; mm3=(55 75 57 77) + pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm0,mm2 ; mm0=tmp0[col1 col3] + paddd mm6,mm3 ; mm6=tmp0[col5 col7] + + ; -- Even part + + movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) + + movq mm2,mm1 ; mm2=(00 01 ** 03) + pslld mm1,WORD_BIT ; mm1=(-- 00 -- **) + psrad mm1,(WORD_BIT-CONST_BITS-2) ; mm1=tmp10[col0 ****] + + pand mm2,mm7 ; mm2=(-- 01 -- 03) + pand mm5,mm7 ; mm5=(-- 05 -- 07) + psrad mm2,(WORD_BIT-CONST_BITS-2) ; mm2=tmp10[col1 col3] + psrad mm5,(WORD_BIT-CONST_BITS-2) ; mm5=tmp10[col5 col7] + + ; -- Final output stage + + movq mm3,mm1 + paddd mm1,mm4 ; mm1=data0[col0 ****]=(A0 **) + psubd mm3,mm4 ; mm3=data1[col0 ****]=(B0 **) + punpckldq mm1,mm3 ; mm1=(A0 B0) + + movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; mm7=[PD_DESCALE_P1_2] + + movq mm4,mm2 + movq mm3,mm5 + paddd mm2,mm0 ; mm2=data0[col1 col3]=(A1 A3) + paddd mm5,mm6 ; mm5=data0[col5 col7]=(A5 A7) + psubd mm4,mm0 ; mm4=data1[col1 col3]=(B1 B3) + psubd mm3,mm6 ; mm3=data1[col5 col7]=(B5 B7) + + paddd mm1,mm7 + psrad mm1,DESCALE_P1_2 + + paddd mm2,mm7 + paddd mm5,mm7 + psrad mm2,DESCALE_P1_2 + psrad mm5,DESCALE_P1_2 + paddd mm4,mm7 + paddd mm3,mm7 + psrad mm4,DESCALE_P1_2 + psrad mm3,DESCALE_P1_2 + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw mm2,mm4 ; mm2=(A1 A3 B1 B3) + packssdw mm5,mm3 ; mm5=(A5 A7 B5 B7) + pmaddwd mm2,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] + + paddd mm2,mm5 ; mm2=tmp0[row0 row1] + + ; -- Even part + + pslld mm1,(CONST_BITS+2) ; mm1=tmp10[row0 row1] + + ; -- Final output stage + + movq mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)] ; mm0=[PD_DESCALE_P2_2] + + movq mm6,mm1 + paddd mm1,mm2 ; mm1=data0[row0 row1]=(C0 C1) + psubd mm6,mm2 ; mm6=data1[row0 row1]=(D0 D1) + + paddd mm1,mm0 + paddd mm6,mm0 + psrad mm1,DESCALE_P2_2 + psrad mm6,DESCALE_P2_2 + + movq mm7,mm1 ; transpose coefficients + punpckldq mm1,mm6 ; mm1=(C0 D0) + punpckhdq mm7,mm6 ; mm7=(C1 D1) + + packssdw mm1,mm7 ; mm1=(C0 D0 C1 D1) + packsswb mm1,mm1 ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1) + paddb mm1,[GOTOFF(ebx,PB_CENTERJSAMP)] + + movd ecx,mm1 + movd ebx,mm1 ; ebx=(C0 D0 C1 D1) + shr ecx,2*BYTE_BIT ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; JIDCT_INT_MMX_SUPPORTED +%endif ; IDCT_SCALING_SUPPORTED diff --git a/jiss2flt.asm b/jiss2flt.asm new file mode 100644 index 0000000..c0565a3 --- /dev/null +++ b/jiss2flt.asm @@ -0,0 +1,508 @@ +; +; jiss2flt.asm - floating-point IDCT (SSE & SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse2) + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_float_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_float_sse2) + +EXTN(jpeg_idct_float_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT * wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, _MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm2, _MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm3, _MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm4, _MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm5, _MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm6, _MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + movq xmm7, _MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm2 + por xmm3,xmm4 + por xmm5,xmm6 + por xmm1,xmm3 + por xmm5,xmm7 + por xmm1,xmm5 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, _MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, _MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq xmm1, _MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm2, _MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm3, _MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, _MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm3, _MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm5, _MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm1, _MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm1=[PD_RNDINT_MAGIC] + pcmpeqd xmm3,xmm3 + psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm7,xmm1 + movaps xmm5,xmm3 + addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm2=[PD_RNDINT_MAGIC] + pcmpeqd xmm4,xmm4 + psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6,xmm2 + paddb xmm1,xmm2 + + movdqa xmm4,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7,xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq _MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7 + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq _MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_FLT_SSE_SSE2_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jiss2fst.asm b/jiss2fst.asm new file mode 100644 index 0000000..937a260 --- /dev/null +++ b/jiss2fst.asm @@ -0,0 +1,512 @@ +; +; jiss2fst.asm - fast integer IDCT (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_IFAST_SUPPORTED +%ifdef JIDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 16 + global EXTN(jconst_idct_ifast_sse2) + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_ifast_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_idct_ifast_sse2) + +EXTN(jpeg_idct_ifast_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 + psubw xmm0,xmm2 ; xmm0=tmp11 + psubw xmm1,xmm3 + paddw xmm4,xmm2 ; xmm4=tmp10 + paddw xmm5,xmm3 ; xmm5=tmp13 + + psllw xmm1,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1,[GOTOFF(ebx,PW_F1414)] + psubw xmm1,xmm5 ; xmm1=tmp12 + + movdqa xmm6,xmm4 + movdqa xmm7,xmm0 + psubw xmm4,xmm5 ; xmm4=tmp3 + psubw xmm0,xmm1 ; xmm0=tmp2 + paddw xmm6,xmm5 ; xmm6=tmp0 + paddw xmm7,xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4,xmm2 + movdqa xmm0,xmm5 + psubw xmm2,xmm1 ; xmm2=z12 + psubw xmm5,xmm3 ; xmm5=z10 + paddw xmm4,xmm1 ; xmm4=z11 + paddw xmm0,xmm3 ; xmm0=z13 + + movdqa xmm1,xmm5 ; xmm1=z10(unscaled) + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3,xmm4 + psubw xmm4,xmm0 + paddw xmm3,xmm0 ; xmm3=tmp7 + + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4,[GOTOFF(ebx,PW_F1414)] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0,xmm5 + paddw xmm5,xmm2 + pmulhw xmm5,[GOTOFF(ebx,PW_F1847)] ; xmm5=z5 + pmulhw xmm0,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm2,[GOTOFF(ebx,PW_F1082)] + psubw xmm0,xmm1 + psubw xmm2,xmm5 ; xmm2=tmp10 + paddw xmm0,xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0,xmm3 ; xmm0=tmp6 + movdqa xmm1,xmm6 + movdqa xmm5,xmm7 + paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4,xmm0 ; xmm4=tmp5 + + movdqa xmm3,xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2,xmm4 ; xmm2=tmp4 + movdqa xmm5,xmm7 + movdqa xmm0,xmm1 + paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0,xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4,xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7,xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7,xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2,xmm6 + movdqa xmm0,xmm5 + psubw xmm6,xmm1 ; xmm6=tmp11 + psubw xmm5,xmm3 + paddw xmm2,xmm1 ; xmm2=tmp10 + paddw xmm0,xmm3 ; xmm0=tmp13 + + psllw xmm5,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5,[GOTOFF(ebx,PW_F1414)] + psubw xmm5,xmm0 ; xmm5=tmp12 + + movdqa xmm1,xmm2 + movdqa xmm3,xmm6 + psubw xmm2,xmm0 ; xmm2=tmp3 + psubw xmm6,xmm5 ; xmm6=tmp2 + paddw xmm1,xmm0 ; xmm1=tmp0 + paddw xmm3,xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2,xmm0 + movdqa xmm6,xmm4 + psubw xmm0,xmm7 ; xmm0=z12 + psubw xmm4,xmm5 ; xmm4=z10 + paddw xmm2,xmm7 ; xmm2=z11 + paddw xmm6,xmm5 ; xmm6=z13 + + movdqa xmm7,xmm4 ; xmm7=z10(unscaled) + psllw xmm0,PRE_MULTIPLY_SCALE_BITS + psllw xmm4,PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5,xmm2 + psubw xmm2,xmm6 + paddw xmm5,xmm6 ; xmm5=tmp7 + + psllw xmm2,PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2,[GOTOFF(ebx,PW_F1414)] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6,xmm4 + paddw xmm4,xmm0 + pmulhw xmm4,[GOTOFF(ebx,PW_F1847)] ; xmm4=z5 + pmulhw xmm6,[GOTOFF(ebx,PW_MF1613)] + pmulhw xmm0,[GOTOFF(ebx,PW_F1082)] + psubw xmm6,xmm7 + psubw xmm0,xmm4 ; xmm0=tmp10 + paddw xmm6,xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6,xmm5 ; xmm6=tmp6 + movdqa xmm7,xmm1 + movdqa xmm4,xmm3 + paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1,(PASS1_BITS+3) ; descale + psraw xmm3,(PASS1_BITS+3) ; descale + psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7,(PASS1_BITS+3) ; descale + psraw xmm4,(PASS1_BITS+3) ; descale + psubw xmm2,xmm6 ; xmm2=tmp5 + + packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0,xmm2 ; xmm0=tmp4 + movdqa xmm4,xmm5 + movdqa xmm7,xmm6 + paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5,(PASS1_BITS+3) ; descale + psraw xmm6,(PASS1_BITS+3) ; descale + psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4,(PASS1_BITS+3) ; descale + psraw xmm7,(PASS1_BITS+3) ; descale + + movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1,xmm2 + paddb xmm3,xmm2 + paddb xmm5,xmm2 + paddb xmm7,xmm2 + + movdqa xmm0,xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2,xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3,xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_INT_SSE2_SUPPORTED +%endif ; DCT_IFAST_SUPPORTED diff --git a/jiss2int.asm b/jiss2int.asm new file mode 100644 index 0000000..b0e7109 --- /dev/null +++ b/jiss2int.asm @@ -0,0 +1,869 @@ +; +; jiss2int.asm - accurate integer IDCT (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_ISLOW_SUPPORTED +%ifdef JIDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS-PASS1_BITS) +%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_islow_sse2) + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) +PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_islow_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 16 + global EXTN(jpeg_idct_islow_sse2) + +EXTN(jpeg_idct_islow_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1,xmm0 + packsswb xmm1,xmm1 + packsswb xmm1,xmm1 + movd eax,xmm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5,PASS1_BITS + + movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm4,xmm3 ; xmm3=in6=z3 + punpckhwd xmm5,xmm3 + movdqa xmm1,xmm4 + movdqa xmm3,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F054_MF130)] ; xmm3=tmp2H + + movdqa xmm6,xmm0 + paddw xmm0,xmm2 ; xmm0=in0+in4 + psubw xmm6,xmm2 ; xmm6=in0-in4 + + pxor xmm7,xmm7 + pxor xmm2,xmm2 + punpcklwd xmm7,xmm0 ; xmm7=tmp0L + punpckhwd xmm2,xmm0 ; xmm2=tmp0H + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0,xmm7 + paddd xmm7,xmm4 ; xmm7=tmp10L + psubd xmm0,xmm4 ; xmm0=tmp13L + movdqa xmm4,xmm2 + paddd xmm2,xmm5 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm7,xmm7 + punpcklwd xmm5,xmm6 ; xmm5=tmp1L + punpckhwd xmm7,xmm6 ; xmm7=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm2,xmm1 ; xmm2=tmp12L + movdqa xmm0,xmm7 + paddd xmm7,xmm3 ; xmm7=tmp11H + psubd xmm0,xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5,xmm6 + movdqa xmm7,xmm4 + paddw xmm5,xmm3 ; xmm5=z3 + paddw xmm7,xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2,xmm5 + movdqa xmm0,xmm5 + punpcklwd xmm2,xmm7 + punpckhwd xmm0,xmm7 + movdqa xmm5,xmm2 + movdqa xmm7,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF078_F117)] ; xmm2=z3L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm7,[GOTOFF(ebx,PW_F117_F078)] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2,xmm3 + movdqa xmm0,xmm3 + punpcklwd xmm2,xmm4 + punpckhwd xmm0,xmm4 + movdqa xmm3,xmm2 + movdqa xmm4,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm2=tmp0L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0H + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3L + pmaddwd xmm4,[GOTOFF(ebx,PW_MF089_F060)] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3,xmm5 ; xmm3=tmp3L + paddd xmm4,xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2,xmm1 + movdqa xmm0,xmm1 + punpcklwd xmm2,xmm6 + punpckhwd xmm0,xmm6 + movdqa xmm1,xmm2 + movdqa xmm6,xmm0 + pmaddwd xmm2,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm2=tmp1L + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF256_F050)] ; xmm1=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm2,xmm5 ; xmm2=tmp1L + paddd xmm0,xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2,xmm5 + movdqa xmm0,xmm7 + paddd xmm5,xmm3 ; xmm5=data0L + paddd xmm7,xmm4 ; xmm7=data0H + psubd xmm2,xmm3 ; xmm2=data7L + psubd xmm0,xmm4 ; xmm0=data7H + + movdqa xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm3=[PD_DESCALE_P1] + + paddd xmm5,xmm3 + paddd xmm7,xmm3 + psrad xmm5,DESCALE_P1 + psrad xmm7,DESCALE_P1 + paddd xmm2,xmm3 + paddd xmm0,xmm3 + psrad xmm2,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7,xmm4 + movdqa xmm0,xmm3 + paddd xmm4,xmm1 ; xmm4=data1L + paddd xmm3,xmm6 ; xmm3=data1H + psubd xmm7,xmm1 ; xmm7=data6L + psubd xmm0,xmm6 ; xmm0=data6H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm1=[PD_DESCALE_P1] + + paddd xmm4,xmm1 + paddd xmm3,xmm1 + psrad xmm4,DESCALE_P1 + psrad xmm3,DESCALE_P1 + paddd xmm7,xmm1 + paddd xmm0,xmm1 + psrad xmm7,DESCALE_P1 + psrad xmm0,DESCALE_P1 + + packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6,xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1,xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5,xmm3 + movdqa xmm6,xmm0 + paddd xmm3,xmm4 ; xmm3=data2L + paddd xmm0,xmm2 ; xmm0=data2H + psubd xmm5,xmm4 ; xmm5=data5L + psubd xmm6,xmm2 ; xmm6=data5H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm7=[PD_DESCALE_P1] + + paddd xmm3,xmm7 + paddd xmm0,xmm7 + psrad xmm3,DESCALE_P1 + psrad xmm0,DESCALE_P1 + paddd xmm5,xmm7 + paddd xmm6,xmm7 + psrad xmm5,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0,xmm1 + movdqa xmm6,xmm4 + paddd xmm1,xmm2 ; xmm1=data3L + paddd xmm4,xmm7 ; xmm4=data3H + psubd xmm0,xmm2 ; xmm0=data4L + psubd xmm6,xmm7 ; xmm6=data4H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm2=[PD_DESCALE_P1] + + paddd xmm1,xmm2 + paddd xmm4,xmm2 + psrad xmm1,DESCALE_P1 + psrad xmm4,DESCALE_P1 + paddd xmm0,xmm2 + paddd xmm6,xmm2 + psrad xmm0,DESCALE_P1 + psrad xmm6,DESCALE_P1 + + packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4,xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6,xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2,xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3,xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4,xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3,xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4,xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6,xmm1 ; xmm1=in2=z2 + movdqa xmm5,xmm1 + punpcklwd xmm6,xmm2 ; xmm2=in6=z3 + punpckhwd xmm5,xmm2 + movdqa xmm1,xmm6 + movdqa xmm2,xmm5 + pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=tmp3L + pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm2,[GOTOFF(ebx,PW_F054_MF130)] ; xmm2=tmp2H + + movdqa xmm3,xmm7 + paddw xmm7,xmm0 ; xmm7=in0+in4 + psubw xmm3,xmm0 ; xmm3=in0-in4 + + pxor xmm4,xmm4 + pxor xmm0,xmm0 + punpcklwd xmm4,xmm7 ; xmm4=tmp0L + punpckhwd xmm0,xmm7 ; xmm0=tmp0H + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7,xmm4 + paddd xmm4,xmm6 ; xmm4=tmp10L + psubd xmm7,xmm6 ; xmm7=tmp13L + movdqa xmm6,xmm0 + paddd xmm0,xmm5 ; xmm0=tmp10H + psubd xmm6,xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5,xmm5 + pxor xmm4,xmm4 + punpcklwd xmm5,xmm3 ; xmm5=tmp1L + punpckhwd xmm4,xmm3 ; xmm4=tmp1H + psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0,xmm5 + paddd xmm5,xmm1 ; xmm5=tmp11L + psubd xmm0,xmm1 ; xmm0=tmp12L + movdqa xmm7,xmm4 + paddd xmm4,xmm2 ; xmm4=tmp11H + psubd xmm7,xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5,xmm6 + movdqa xmm4,xmm3 + paddw xmm5,xmm1 ; xmm5=z3 + paddw xmm4,xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0,xmm5 + movdqa xmm7,xmm5 + punpcklwd xmm0,xmm4 + punpckhwd xmm7,xmm4 + movdqa xmm5,xmm0 + movdqa xmm4,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3H + pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm4,[GOTOFF(ebx,PW_F117_F078)] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0,xmm1 + movdqa xmm7,xmm1 + punpcklwd xmm0,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm1,xmm0 + movdqa xmm3,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp0H + pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp3L + pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1,xmm5 ; xmm1=tmp3L + paddd xmm3,xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm0,xmm6 + punpckhwd xmm7,xmm6 + movdqa xmm2,xmm0 + movdqa xmm6,xmm7 + pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1L + pmaddwd xmm7,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm7=tmp1H + pmaddwd xmm2,[GOTOFF(ebx,PW_MF256_F050)] ; xmm2=tmp2L + pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm0,xmm5 ; xmm0=tmp1L + paddd xmm7,xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0,xmm5 + movdqa xmm7,xmm4 + paddd xmm5,xmm1 ; xmm5=data0L + paddd xmm4,xmm3 ; xmm4=data0H + psubd xmm0,xmm1 ; xmm0=data7L + psubd xmm7,xmm3 ; xmm7=data7H + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm1=[PD_DESCALE_P2] + + paddd xmm5,xmm1 + paddd xmm4,xmm1 + psrad xmm5,DESCALE_P2 + psrad xmm4,DESCALE_P2 + paddd xmm0,xmm1 + paddd xmm7,xmm1 + psrad xmm0,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4,xmm3 + movdqa xmm7,xmm1 + paddd xmm3,xmm2 ; xmm3=data1L + paddd xmm1,xmm6 ; xmm1=data1H + psubd xmm4,xmm2 ; xmm4=data6L + psubd xmm7,xmm6 ; xmm7=data6H + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm2=[PD_DESCALE_P2] + + paddd xmm3,xmm2 + paddd xmm1,xmm2 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm4,xmm2 + paddd xmm7,xmm2 + psrad xmm4,DESCALE_P2 + psrad xmm7,DESCALE_P2 + + packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4,xmm6 + movdqa xmm0,xmm2 + paddd xmm6,xmm1 ; xmm6=data2L + paddd xmm2,xmm7 ; xmm2=data2H + psubd xmm4,xmm1 ; xmm4=data5L + psubd xmm0,xmm7 ; xmm0=data5H + + movdqa xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm5=[PD_DESCALE_P2] + + paddd xmm6,xmm5 + paddd xmm2,xmm5 + psrad xmm6,DESCALE_P2 + psrad xmm2,DESCALE_P2 + paddd xmm4,xmm5 + paddd xmm0,xmm5 + psrad xmm4,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2,xmm3 + movdqa xmm0,xmm1 + paddd xmm3,xmm7 ; xmm3=data3L + paddd xmm1,xmm5 ; xmm1=data3H + psubd xmm2,xmm7 ; xmm2=data4L + psubd xmm0,xmm5 ; xmm0=data4H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm7=[PD_DESCALE_P2] + + paddd xmm3,xmm7 + paddd xmm1,xmm7 + psrad xmm3,DESCALE_P2 + psrad xmm1,DESCALE_P2 + paddd xmm2,xmm7 + paddd xmm0,xmm7 + psrad xmm2,DESCALE_P2 + psrad xmm0,DESCALE_P2 + + movdqa xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm5=[PB_CENTERJSAMP] + + packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7,xmm5 + paddb xmm1,xmm5 + paddb xmm6,xmm5 + paddb xmm3,xmm5 + + movdqa xmm0,xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2,xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4,xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5,xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1,xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3,xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq _MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 + movq _MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_INT_SSE2_SUPPORTED +%endif ; DCT_ISLOW_SUPPORTED diff --git a/jiss2red.asm b/jiss2red.asm new file mode 100644 index 0000000..53af6fe --- /dev/null +++ b/jiss2red.asm @@ -0,0 +1,607 @@ +; +; jiss2red.asm - reduced-size IDCT (SSE2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef IDCT_SCALING_SUPPORTED +%ifdef JIDCT_INT_SSE2_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) +%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) +%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) +%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) +F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_red_sse2) + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847,-F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061,-F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 +PW_F145_MF021 times 4 dw F_1_451,-F_0_211 +PW_F362_MF127 times 4 dw F_3_624,-F_1_272 +PW_F085_MF072 times 4 dw F_0_850,-F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jpeg_idct_4x4_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 16 + global EXTN(jpeg_idct_4x4_sse2) + +EXTN(jpeg_idct_4x4_sse2): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm0,xmm1 + packsswb xmm0,xmm0 + packsswb xmm0,xmm0 + movd eax,xmm0 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0,PASS1_BITS + + movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end + alignx 16,7 +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4,xmm0 + movdqa xmm5,xmm0 + punpcklwd xmm4,xmm1 + punpckhwd xmm5,xmm1 + movdqa xmm0,xmm4 + movdqa xmm1,xmm5 + pmaddwd xmm4,[GOTOFF(ebx,PW_F256_F089)] ; xmm4=(tmp2L) + pmaddwd xmm5,[GOTOFF(ebx,PW_F256_F089)] ; xmm5=(tmp2H) + pmaddwd xmm0,[GOTOFF(ebx,PW_F106_MF217)] ; xmm0=(tmp0L) + pmaddwd xmm1,[GOTOFF(ebx,PW_F106_MF217)] ; xmm1=(tmp0H) + + movdqa xmm6,xmm2 + movdqa xmm7,xmm2 + punpcklwd xmm6,xmm3 + punpckhwd xmm7,xmm3 + movdqa xmm2,xmm6 + movdqa xmm3,xmm7 + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2L) + pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm7=(tmp2H) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0L) + pmaddwd xmm3,[GOTOFF(ebx,PW_F145_MF021)] ; xmm3=(tmp0H) + + paddd xmm6,xmm4 ; xmm6=tmp2L + paddd xmm7,xmm5 ; xmm7=tmp2H + paddd xmm2,xmm0 ; xmm2=tmp0L + paddd xmm3,xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1,xmm1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm4 ; xmm1=tmp0L + punpckhwd xmm2,xmm4 ; xmm2=tmp0H + psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3,xmm5 ; xmm5=in2=z2 + punpcklwd xmm5,xmm0 ; xmm0=in6=z3 + punpckhwd xmm3,xmm0 + pmaddwd xmm5,[GOTOFF(ebx,PW_F184_MF076)] ; xmm5=tmp2L + pmaddwd xmm3,[GOTOFF(ebx,PW_F184_MF076)] ; xmm3=tmp2H + + movdqa xmm4,xmm1 + movdqa xmm0,xmm2 + paddd xmm1,xmm5 ; xmm1=tmp10L + paddd xmm2,xmm3 ; xmm2=tmp10H + psubd xmm4,xmm5 ; xmm4=tmp12L + psubd xmm0,xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5,xmm1 + movdqa xmm3,xmm2 + paddd xmm1,xmm6 ; xmm1=data0L + paddd xmm2,xmm7 ; xmm2=data0H + psubd xmm5,xmm6 ; xmm5=data3L + psubd xmm3,xmm7 ; xmm3=data3H + + movdqa xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm6=[PD_DESCALE_P1_4] + + paddd xmm1,xmm6 + paddd xmm2,xmm6 + psrad xmm1,DESCALE_P1_4 + psrad xmm2,DESCALE_P1_4 + paddd xmm5,xmm6 + paddd xmm3,xmm6 + psrad xmm5,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2,xmm4 + movdqa xmm3,xmm0 + paddd xmm4,xmm7 ; xmm4=data1L + paddd xmm0,xmm6 ; xmm0=data1H + psubd xmm2,xmm7 ; xmm2=data2L + psubd xmm3,xmm6 ; xmm3=data2H + + movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm7=[PD_DESCALE_P1_4] + + paddd xmm4,xmm7 + paddd xmm0,xmm7 + psrad xmm4,DESCALE_P1_4 + psrad xmm0,DESCALE_P1_4 + paddd xmm2,xmm7 + paddd xmm3,xmm7 + psrad xmm2,DESCALE_P1_4 + psrad xmm3,DESCALE_P1_4 + + packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6,xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7,xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0,xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3,xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + pxor xmm4,xmm4 + punpcklwd xmm4,xmm1 ; xmm4=tmp0 + psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1,xmm0 + punpckhwd xmm6,xmm3 + movdqa xmm5,xmm1 + movdqa xmm2,xmm6 + pmaddwd xmm1,[GOTOFF(ebx,PW_F256_F089)] ; xmm1=(tmp2) + pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2) + pmaddwd xmm5,[GOTOFF(ebx,PW_F106_MF217)] ; xmm5=(tmp0) + pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0) + + paddd xmm6,xmm1 ; xmm6=tmp2 + paddd xmm2,xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0,xmm3 + pmaddwd xmm0,[GOTOFF(ebx,PW_F184_MF076)] ; xmm0=tmp2 + + movdqa xmm7,xmm4 + paddd xmm4,xmm0 ; xmm4=tmp10 + psubd xmm7,xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; xmm1=[PD_DESCALE_P2_4] + + movdqa xmm5,xmm4 + movdqa xmm3,xmm7 + paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4,xmm1 + paddd xmm7,xmm1 + psrad xmm4,DESCALE_P2_4 + psrad xmm7,DESCALE_P2_4 + paddd xmm5,xmm1 + paddd xmm3,xmm1 + psrad xmm5,DESCALE_P2_4 + psrad xmm3,DESCALE_P2_4 + + packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0,xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6,xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movd _DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movd _DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd _DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movd _DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jpeg_idct_2x2_sse2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + + align 16 + global EXTN(jpeg_idct_2x2_sse2) + +EXTN(jpeg_idct_2x2_sse2): + push ebp + mov ebp,esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [compptr(ebp)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7,xmm7 + pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm5,[GOTOFF(ebx,PW_F085_MF072)] + + psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3,xmm6 + movdqa xmm5,xmm1 + paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; xmm2=[PD_DESCALE_P1_2] + + punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7,xmm1 + punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6,xmm2 + psrad xmm6,DESCALE_P1_2 + + paddd xmm1,xmm2 + paddd xmm7,xmm2 + psrad xmm1,DESCALE_P1_2 + psrad xmm7,DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1,[GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm7,[GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4,xmm6 + paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)] + psrad xmm6,DESCALE_P2_2 + + packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)] + + pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) + pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +%endif ; JIDCT_INT_SSE2_SUPPORTED +%endif ; IDCT_SCALING_SUPPORTED diff --git a/jisseflt.asm b/jisseflt.asm new file mode 100644 index 0000000..20eaeeb --- /dev/null +++ b/jisseflt.asm @@ -0,0 +1,582 @@ +; +; jisseflt.asm - floating-point IDCT (SSE & MMX) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +%ifdef DCT_FLOAT_SUPPORTED +%ifdef JIDCT_FLT_SSE_MMX_SUPPORTED + +; This module is specialized to the case DCTSIZE = 8. +; +%if DCTSIZE != 8 +%error "Sorry, this code only copes with 8x8 DCTs." +%endif + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1,%2,0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1,%2,0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + global EXTN(jconst_idct_float_sse) + +EXTN(jconst_idct_float_sse): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_0_125 times 4 dd 0.125 ; 1/8 +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jpeg_idct_float_sse (j_decompress_ptr cinfo, jpeg_component_info * compptr, +; JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define cinfo(b) (b)+8 ; j_decompress_ptr cinfo +%define compptr(b) (b)+12 ; jpeg_component_info * compptr +%define coef_block(b) (b)+16 ; JCOEFPTR coef_block +%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf +%define output_col(b) (b)+24 ; JDIMENSION output_col + +%define original_ebp ebp+0 +%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 16 + global EXTN(jpeg_idct_float_sse) + +EXTN(jpeg_idct_float_sse): + push ebp + mov eax,esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp],eax + mov ebp,esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [compptr(eax)] + mov edx, POINTER [jcompinfo_dct_table(edx)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT * wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 + test eax,eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm1,mm0 ; mm1=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm3,mm1 ; xmm3=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + movlhps xmm0,xmm3 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1,xmm0 + movaps xmm2,xmm0 + movaps xmm3,xmm0 + + shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) + shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) + shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) + shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16,7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm4,mm0 ; mm4=(** 02 ** 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) + punpckhwd mm5,mm1 ; mm5=(** 22 ** 23) + punpcklwd mm1,mm1 ; mm1=(20 20 21 21) + + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in0H=(02 03) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm4,mm4 ; xmm4=(02 03 ** **) + cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in2H=(22 23) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in2L=(20 21) + cvtpi2ps xmm5,mm5 ; xmm5=(22 23 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(20 21 ** **) + + punpckhwd mm6,mm2 ; mm6=(** 42 ** 43) + punpcklwd mm2,mm2 ; mm2=(40 40 41 41) + punpckhwd mm7,mm3 ; mm7=(** 62 ** 63) + punpcklwd mm3,mm3 ; mm3=(60 60 61 61) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in4H=(42 43) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in4L=(40 41) + cvtpi2ps xmm6,mm6 ; xmm6=(42 43 ** **) + cvtpi2ps xmm2,mm2 ; xmm2=(40 41 ** **) + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in6H=(62 63) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in6L=(60 61) + cvtpi2ps xmm7,mm7 ; xmm7=(62 63 ** **) + cvtpi2ps xmm3,mm3 ; xmm3=(60 61 ** **) + + movlhps xmm0,xmm4 ; xmm0=in0=(00 01 02 03) + movlhps xmm1,xmm5 ; xmm1=in2=(20 21 22 23) + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm2,xmm6 ; xmm2=in4=(40 41 42 43) + movlhps xmm3,xmm7 ; xmm3=in6=(60 61 62 63) + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm6,mm4 ; mm6=(** 12 ** 13) + punpcklwd mm4,mm4 ; mm4=(10 10 11 11) + punpckhwd mm2,mm0 ; mm2=(** 32 ** 33) + punpcklwd mm0,mm0 ; mm0=(30 30 31 31) + + psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in1H=(12 13) + psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in1L=(10 11) + cvtpi2ps xmm4,mm6 ; xmm4=(12 13 ** **) + cvtpi2ps xmm2,mm4 ; xmm2=(10 11 ** **) + psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in3H=(32 33) + psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in3L=(30 31) + cvtpi2ps xmm0,mm2 ; xmm0=(32 33 ** **) + cvtpi2ps xmm3,mm0 ; xmm3=(30 31 ** **) + + punpckhwd mm7,mm5 ; mm7=(** 52 ** 53) + punpcklwd mm5,mm5 ; mm5=(50 50 51 51) + punpckhwd mm3,mm1 ; mm3=(** 72 ** 73) + punpcklwd mm1,mm1 ; mm1=(70 70 71 71) + + movlhps xmm2,xmm4 ; xmm2=in1=(10 11 12 13) + movlhps xmm3,xmm0 ; xmm3=in3=(30 31 32 33) + + psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in5H=(52 53) + psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in5L=(50 51) + cvtpi2ps xmm4,mm7 ; xmm4=(52 53 ** **) + cvtpi2ps xmm5,mm5 ; xmm5=(50 51 ** **) + psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in7H=(72 73) + psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in7L=(70 71) + cvtpi2ps xmm0,mm3 ; xmm0=(72 73 ** **) + cvtpi2ps xmm1,mm1 ; xmm1=(70 71 ** **) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm5,xmm4 ; xmm5=in5=(50 51 52 53) + movlhps xmm1,xmm0 ; xmm1=in7=(70 71 72 73) + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) + movaps xmm3,xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm0,xmm7 + movaps xmm3,xmm5 + addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2,xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) + movaps xmm4,xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3,xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) + movaps xmm0,xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6,xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) + movaps xmm3,xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT * wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16,7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm0 + movaps xmm5,xmm1 + subps xmm0,xmm2 ; xmm0=tmp11 + subps xmm1,xmm3 + addps xmm4,xmm2 ; xmm4=tmp10 + addps xmm5,xmm3 ; xmm5=tmp13 + + mulps xmm1,[GOTOFF(ebx,PD_1_414)] + subps xmm1,xmm5 ; xmm1=tmp12 + + movaps xmm6,xmm4 + movaps xmm7,xmm0 + subps xmm4,xmm5 ; xmm4=tmp3 + subps xmm0,xmm1 ; xmm0=tmp2 + addps xmm6,xmm5 ; xmm6=tmp0 + addps xmm7,xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4,xmm2 + movaps xmm0,xmm5 + addps xmm2,xmm1 ; xmm2=z11 + addps xmm5,xmm3 ; xmm5=z13 + subps xmm4,xmm1 ; xmm4=z12 + subps xmm0,xmm3 ; xmm0=z10 + + movaps xmm1,xmm2 + subps xmm2,xmm5 + addps xmm1,xmm5 ; xmm1=tmp7 + + mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3,xmm0 + addps xmm0,xmm4 + mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3,xmm0 ; xmm3=tmp12 + subps xmm4,xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3,xmm1 ; xmm3=tmp6 + movaps xmm5,xmm6 + movaps xmm0,xmm7 + addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2,xmm3 ; xmm2=tmp5 + + movaps xmm1,[GOTOFF(ebx,PD_0_125)] ; xmm1=[PD_0_125] + + mulps xmm6,xmm1 ; descale(1/8) + mulps xmm7,xmm1 ; descale(1/8) + mulps xmm5,xmm1 ; descale(1/8) + mulps xmm0,xmm1 ; descale(1/8) + + movhlps xmm3,xmm6 + movhlps xmm1,xmm7 + cvtps2pi mm0,xmm6 ; round to int32, mm0=data0L=(00 10) + cvtps2pi mm1,xmm7 ; round to int32, mm1=data1L=(01 11) + cvtps2pi mm2,xmm3 ; round to int32, mm2=data0H=(20 30) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data1H=(21 31) + packssdw mm0,mm2 ; mm0=data0=(00 10 20 30) + packssdw mm1,mm3 ; mm1=data1=(01 11 21 31) + + movhlps xmm6,xmm5 + movhlps xmm7,xmm0 + cvtps2pi mm4,xmm5 ; round to int32, mm4=data7L=(07 17) + cvtps2pi mm5,xmm0 ; round to int32, mm5=data6L=(06 16) + cvtps2pi mm6,xmm6 ; round to int32, mm6=data7H=(27 37) + cvtps2pi mm7,xmm7 ; round to int32, mm7=data6H=(26 36) + packssdw mm4,mm6 ; mm4=data7=(07 17 27 37) + packssdw mm5,mm7 ; mm5=data6=(06 16 26 36) + + packsswb mm0,mm5 ; mm0=(00 10 20 30 06 16 26 36) + packsswb mm1,mm4 ; mm1=(01 11 21 31 07 17 27 37) + + movaps xmm3, XMMWORD [wk(0)] ; xmm3=tmp2 + movaps xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movaps xmm6,[GOTOFF(ebx,PD_0_125)] ; xmm6=[PD_0_125] + + addps xmm4,xmm2 ; xmm4=tmp4 + movaps xmm5,xmm3 + movaps xmm0,xmm1 + addps xmm3,xmm2 ; xmm3=data2=(02 12 22 32) + addps xmm1,xmm4 ; xmm1=data4=(04 14 24 34) + subps xmm5,xmm2 ; xmm5=data5=(05 15 25 35) + subps xmm0,xmm4 ; xmm0=data3=(03 13 23 33) + + mulps xmm3,xmm6 ; descale(1/8) + mulps xmm1,xmm6 ; descale(1/8) + mulps xmm5,xmm6 ; descale(1/8) + mulps xmm0,xmm6 ; descale(1/8) + + movhlps xmm7,xmm3 + movhlps xmm2,xmm1 + cvtps2pi mm2,xmm3 ; round to int32, mm2=data2L=(02 12) + cvtps2pi mm3,xmm1 ; round to int32, mm3=data4L=(04 14) + cvtps2pi mm6,xmm7 ; round to int32, mm6=data2H=(22 32) + cvtps2pi mm7,xmm2 ; round to int32, mm7=data4H=(24 34) + packssdw mm2,mm6 ; mm2=data2=(02 12 22 32) + packssdw mm3,mm7 ; mm3=data4=(04 14 24 34) + + movhlps xmm4,xmm5 + movhlps xmm6,xmm0 + cvtps2pi mm5,xmm5 ; round to int32, mm5=data5L=(05 15) + cvtps2pi mm4,xmm0 ; round to int32, mm4=data3L=(03 13) + cvtps2pi mm6,xmm4 ; round to int32, mm6=data5H=(25 35) + cvtps2pi mm7,xmm6 ; round to int32, mm7=data3H=(23 33) + packssdw mm5,mm6 ; mm5=data5=(05 15 25 35) + packssdw mm4,mm7 ; mm4=data3=(03 13 23 33) + + movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm2,mm3 ; mm2=(02 12 22 32 04 14 24 34) + packsswb mm4,mm5 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0,mm6 + paddb mm1,mm6 + paddb mm2,mm6 + paddb mm4,mm6 + + movq mm7,mm0 ; transpose coefficients(phase 1) + punpcklbw mm0,mm1 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm7,mm1 ; mm7=(06 07 16 17 26 27 36 37) + movq mm3,mm2 ; transpose coefficients(phase 1) + punpcklbw mm2,mm4 ; mm2=(02 03 12 13 22 23 32 33) + punpckhbw mm3,mm4 ; mm3=(04 05 14 15 24 25 34 35) + + movq mm5,mm0 ; transpose coefficients(phase 2) + punpcklwd mm0,mm2 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm5,mm2 ; mm5=(20 21 22 23 30 31 32 33) + movq mm6,mm3 ; transpose coefficients(phase 2) + punpcklwd mm3,mm7 ; mm3=(04 05 06 07 14 15 16 17) + punpckhwd mm6,mm7 ; mm6=(24 25 26 27 34 35 36 37) + + movq mm1,mm0 ; transpose coefficients(phase 3) + punpckldq mm0,mm3 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm1,mm3 ; mm1=(10 11 12 13 14 15 16 17) + movq mm4,mm5 ; transpose coefficients(phase 3) + punpckldq mm5,mm6 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm4,mm6 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp,ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%endif ; JIDCT_FLT_SSE_MMX_SUPPORTED +%endif ; DCT_FLOAT_SUPPORTED diff --git a/jmemmgr.c b/jmemmgr.c index d801b32..e3149e5 100644 --- a/jmemmgr.c +++ b/jmemmgr.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : January 27, 2004 + * --------------------------------------------------------------------- + * * This file contains the JPEG system-independent memory management * routines. This code is usable across a wide variety of machines; most * of the system dependencies have been isolated in a separate file. @@ -51,27 +58,12 @@ extern char * getenv JPP((const char * name)); /* - * Many machines require storage alignment: longs must start on 4-byte - * boundaries, doubles on 8-byte boundaries, etc. On such machines, malloc() - * always returns pointers that are multiples of the worst-case alignment - * requirement, and we had better do so too. - * There isn't any really portable way to determine the worst-case alignment - * requirement. This module assumes that the alignment requirement is - * multiples of sizeof(ALIGN_TYPE). - * By default, we define ALIGN_TYPE as double. This is necessary on some - * workstations (where doubles really do need 8-byte alignment) and will work - * fine on nearly everything. If your machine has lesser alignment needs, - * you can save a few bytes by making ALIGN_TYPE smaller. - * The only place I know of where this will NOT work is certain Macintosh - * 680x0 compilers that define double as a 10-byte IEEE extended float. - * Doing 10-byte alignment is counterproductive because longwords won't be - * aligned well. Put "#define ALIGN_TYPE long" in jconfig.h if you have - * such a compiler. + * SIMD Ext: Most of SSE/SSE2 instructions require that the memory address + * is aligned to a 16-byte boundary; if not, a general-protection exception + * (#GP) is generated. */ -#ifndef ALIGN_TYPE /* so can override from jconfig.h */ -#define ALIGN_TYPE double -#endif +#define ALIGN_SIZE 16 /* sizeof SSE/SSE2 register */ /* @@ -81,31 +73,24 @@ extern char * getenv JPP((const char * name)); * header with a link to the next pool of the same class. * Small and large pool headers are identical except that the latter's * link pointer must be FAR on 80x86 machines. - * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE - * field. This forces the compiler to make SIZEOF(small_pool_hdr) a multiple - * of the alignment requirement of ALIGN_TYPE. */ -typedef union small_pool_struct * small_pool_ptr; +typedef struct small_pool_struct * small_pool_ptr; -typedef union small_pool_struct { - struct { - small_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ - } hdr; - ALIGN_TYPE dummy; /* included in union to ensure alignment */ +typedef struct small_pool_struct { + small_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ + char dummy[ALIGN_SIZE-1]; } small_pool_hdr; -typedef union large_pool_struct FAR * large_pool_ptr; +typedef struct large_pool_struct FAR * large_pool_ptr; -typedef union large_pool_struct { - struct { - large_pool_ptr next; /* next in list of pools */ - size_t bytes_used; /* how many bytes already used within pool */ - size_t bytes_left; /* bytes still available in this pool */ - } hdr; - ALIGN_TYPE dummy; /* included in union to ensure alignment */ +typedef struct large_pool_struct { + large_pool_ptr next; /* next in list of pools */ + size_t bytes_used; /* how many bytes already used within pool */ + size_t bytes_left; /* bytes still available in this pool */ + char dummy[ALIGN_SIZE-1]; } large_pool_hdr; @@ -197,16 +182,16 @@ print_mem_stats (j_common_ptr cinfo, int pool_id) pool_id, mem->total_space_allocated); for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL; - lhdr_ptr = lhdr_ptr->hdr.next) { + lhdr_ptr = lhdr_ptr->next) { fprintf(stderr, " Large chunk used %ld\n", - (long) lhdr_ptr->hdr.bytes_used); + (long) lhdr_ptr->bytes_used); } for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL; - shdr_ptr = shdr_ptr->hdr.next) { + shdr_ptr = shdr_ptr->next) { fprintf(stderr, " Small chunk used %ld free %ld\n", - (long) shdr_ptr->hdr.bytes_used, - (long) shdr_ptr->hdr.bytes_left); + (long) shdr_ptr->bytes_used, + (long) shdr_ptr->bytes_left); } } @@ -266,10 +251,10 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr))) out_of_memory(cinfo, 1); /* request exceeds malloc's ability */ - /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */ - odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE); + /* Round up the requested size to a multiple of ALIGN_SIZE */ + odd_bytes = sizeofobject % ALIGN_SIZE; if (odd_bytes > 0) - sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes; + sizeofobject += ALIGN_SIZE - odd_bytes; /* See if space is available in any existing pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) @@ -277,10 +262,10 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) prev_hdr_ptr = NULL; hdr_ptr = mem->small_list[pool_id]; while (hdr_ptr != NULL) { - if (hdr_ptr->hdr.bytes_left >= sizeofobject) + if (hdr_ptr->bytes_left >= sizeofobject) break; /* found pool with enough space */ prev_hdr_ptr = hdr_ptr; - hdr_ptr = hdr_ptr->hdr.next; + hdr_ptr = hdr_ptr->next; } /* Time to make a new pool? */ @@ -305,20 +290,20 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) } mem->total_space_allocated += min_request + slop; /* Success, initialize the new pool header and add to end of list */ - hdr_ptr->hdr.next = NULL; - hdr_ptr->hdr.bytes_used = 0; - hdr_ptr->hdr.bytes_left = sizeofobject + slop; + hdr_ptr->next = NULL; + hdr_ptr->bytes_used = 0; + hdr_ptr->bytes_left = sizeofobject + slop; if (prev_hdr_ptr == NULL) /* first pool in class? */ mem->small_list[pool_id] = hdr_ptr; else - prev_hdr_ptr->hdr.next = hdr_ptr; + prev_hdr_ptr->next = hdr_ptr; } /* OK, allocate the object from the current pool */ - data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */ - data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */ - hdr_ptr->hdr.bytes_used += sizeofobject; - hdr_ptr->hdr.bytes_left -= sizeofobject; + data_ptr = (char *) ((size_t) (hdr_ptr + 1) & -ALIGN_SIZE); + data_ptr += hdr_ptr->bytes_used; /* point to place for object */ + hdr_ptr->bytes_used += sizeofobject; + hdr_ptr->bytes_left -= sizeofobject; return (void *) data_ptr; } @@ -350,10 +335,10 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr))) out_of_memory(cinfo, 3); /* request exceeds malloc's ability */ - /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */ - odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE); + /* Round up the requested size to a multiple of ALIGN_SIZE */ + odd_bytes = sizeofobject % ALIGN_SIZE; if (odd_bytes > 0) - sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes; + sizeofobject += ALIGN_SIZE - odd_bytes; /* Always make a new pool */ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) @@ -366,15 +351,15 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr); /* Success, initialize the new pool header and add to list */ - hdr_ptr->hdr.next = mem->large_list[pool_id]; + hdr_ptr->next = mem->large_list[pool_id]; /* We maintain space counts in each pool header for statistical purposes, * even though they are not needed for allocation. */ - hdr_ptr->hdr.bytes_used = sizeofobject; - hdr_ptr->hdr.bytes_left = 0; + hdr_ptr->bytes_used = sizeofobject; + hdr_ptr->bytes_left = 0; mem->large_list[pool_id] = hdr_ptr; - return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */ + return (void FAR *) ((size_t) (hdr_ptr + 1) & -ALIGN_SIZE); } @@ -401,6 +386,12 @@ alloc_sarray (j_common_ptr cinfo, int pool_id, JSAMPROW workspace; JDIMENSION rowsperchunk, currow, i; long ltemp; + JDIMENSION odd_samples; + + /* Round up the row bytes to a multiple of ALIGN_SIZE */ + odd_samples = samplesperrow % (ALIGN_SIZE / SIZEOF(JSAMPLE)); + if (odd_samples > 0) + samplesperrow += (ALIGN_SIZE / SIZEOF(JSAMPLE)) - odd_samples; /* Calculate max # of rows allowed in one allocation chunk */ ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / @@ -968,9 +959,9 @@ free_pool (j_common_ptr cinfo, int pool_id) mem->large_list[pool_id] = NULL; while (lhdr_ptr != NULL) { - large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next; - space_freed = lhdr_ptr->hdr.bytes_used + - lhdr_ptr->hdr.bytes_left + + large_pool_ptr next_lhdr_ptr = lhdr_ptr->next; + space_freed = lhdr_ptr->bytes_used + + lhdr_ptr->bytes_left + SIZEOF(large_pool_hdr); jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed); mem->total_space_allocated -= space_freed; @@ -982,9 +973,9 @@ free_pool (j_common_ptr cinfo, int pool_id) mem->small_list[pool_id] = NULL; while (shdr_ptr != NULL) { - small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next; - space_freed = shdr_ptr->hdr.bytes_used + - shdr_ptr->hdr.bytes_left + + small_pool_ptr next_shdr_ptr = shdr_ptr->next; + space_freed = shdr_ptr->bytes_used + + shdr_ptr->bytes_left + SIZEOF(small_pool_hdr); jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed); mem->total_space_allocated -= space_freed; @@ -1035,22 +1026,22 @@ jinit_memory_mgr (j_common_ptr cinfo) cinfo->mem = NULL; /* for safety if init fails */ /* Check for configuration errors. - * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably + * ALIGN_SIZE should be a power of 2; otherwise, it probably * doesn't reflect any real hardware alignment requirement. * The test is a little tricky: for X>0, X and X-1 have no one-bits * in common if and only if X is a power of 2, ie has only one one-bit. * Some compilers may give an "unreachable code" warning here; ignore it. */ - if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0) + if ((ALIGN_SIZE & (ALIGN_SIZE-1)) != 0) ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE); /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be - * a multiple of SIZEOF(ALIGN_TYPE). + * a multiple of ALIGN_SIZE. * Again, an "unreachable code" warning may be ignored here. * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK. */ test_mac = (size_t) MAX_ALLOC_CHUNK; if ((long) test_mac != MAX_ALLOC_CHUNK || - (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0) + (MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0) ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK); max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */ diff --git a/jmorecfg.h b/jmorecfg.h index 54a7d1c..b425519 100644 --- a/jmorecfg.h +++ b/jmorecfg.h @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : March 28, 2005 + * --------------------------------------------------------------------- + * * This file contains additional configuration options that customize the * JPEG software for special applications or support machine-dependent * optimizations. Most users will not need to touch this file. @@ -20,7 +27,9 @@ * We do not support run-time selection of data precision, sorry. */ -#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ +/* SIMD Ext: This SIMD code only copes with 8-bit sample values. */ + +#define BITS_IN_JSAMPLE 8 /* SIMD Ext: cannot be changed! */ /* @@ -157,7 +166,8 @@ typedef short INT16; /* INT32 must hold at least signed 32-bit values. */ -#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ + /* X11/xmd.h and basetsd.h (Win32 SDK) correctly define INT32 */ +#if !defined(XMD_H) && !defined(_BASETSD_H_) && !defined(_BASETSD_H) typedef long INT32; #endif @@ -180,14 +190,24 @@ typedef unsigned int JDIMENSION; * or code profilers that require it. */ +#if defined(_MSC_VER) || defined(__BORLANDC__) || \ + defined(__WATCOMC__) || defined(__MWERKS__) || \ + defined(__ICC) || defined(__INTEL_COMPILER) +#define JCDECL __cdecl +#elif defined(__GNUC__) +#define JCDECL __attribute__((__cdecl__)) +#else +#define JCDECL +#endif + /* a function called through method pointers: */ -#define METHODDEF(type) static type +#define METHODDEF(type) static type JCDECL /* a function used only in its module: */ #define LOCAL(type) static type /* a function referenced thru EXTERNs: */ -#define GLOBAL(type) type +#define GLOBAL(type) type JCDECL /* a reference to a GLOBAL function: */ -#define EXTERN(type) extern type +#define EXTERN(type) extern type JCDECL /* This macro is used to declare a "method", that is, a function pointer. @@ -197,9 +217,9 @@ typedef unsigned int JDIMENSION; */ #ifdef HAVE_PROTOTYPES -#define JMETHOD(type,methodname,arglist) type (*methodname) arglist +#define JMETHOD(type,methodname,arglist) type (JCDECL *methodname) arglist #else -#define JMETHOD(type,methodname,arglist) type (*methodname) () +#define JMETHOD(type,methodname,arglist) type (JCDECL *methodname) () #endif @@ -209,11 +229,13 @@ typedef unsigned int JDIMENSION; * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. */ +#ifndef FAR #ifdef NEED_FAR_POINTERS #define FAR far #else #define FAR #endif +#endif /* !FAR */ /* @@ -224,8 +246,14 @@ typedef unsigned int JDIMENSION; */ #ifndef HAVE_BOOLEAN -typedef int boolean; +#ifdef TYPEDEF_UCHAR_BOOLEAN +#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ +typedef unsigned char boolean; #endif +#else /* !TYPEDEF_UCHAR_BOOLEAN */ +typedef int boolean; +#endif /* TYPEDEF_UCHAR_BOOLEAN */ +#endif /* !HAVE_BOOLEAN */ #ifndef FALSE /* in case these macros already exist */ #define FALSE 0 /* values of boolean */ #endif @@ -290,6 +318,7 @@ typedef int boolean; #define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */ #define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ +#define UPSAMPLE_H1V2_SUPPORTED /* Fast/fancy processing for 1h2v? */ #define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ #define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ @@ -316,6 +345,84 @@ typedef int boolean; #define RGB_BLUE 2 /* Offset of Blue */ #define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ +#undef RGBX_FILLER_0XFF /* fill dummy bytes with 0xFF in RGBX format */ + + +/* SIMD support options: */ + +#ifndef JSIMD_MMX_NOT_SUPPORTED +#define JSIMD_ENCODER_MMX_SUPPORTED /* Use MMX in encoding process */ +#define JSIMD_DECODER_MMX_SUPPORTED /* Use MMX in decoding process */ +#endif +#ifndef JSIMD_3DNOW_NOT_SUPPORTED +#define JSIMD_ENCODER_3DNOW_SUPPORTED /* Use 3DNow! in encoding process */ +#define JSIMD_DECODER_3DNOW_SUPPORTED /* Use 3DNow! in decoding process */ +#endif +#ifndef JSIMD_SSE_NOT_SUPPORTED +#define JSIMD_ENCODER_SSE_SUPPORTED /* Use SSE in encoding process */ +#define JSIMD_DECODER_SSE_SUPPORTED /* Use SSE in decoding process */ +#endif +#ifndef JSIMD_SSE2_NOT_SUPPORTED +#define JSIMD_ENCODER_SSE2_SUPPORTED /* Use SSE2 in encoding process */ +#define JSIMD_DECODER_SSE2_SUPPORTED /* Use SSE2 in decoding process */ +#endif + +/* (encoder part): */ + +#undef JFDCT_INT_QUANTIZE_WITH_DIVISION /* Use general quantization method */ + +#if defined(JSIMD_ENCODER_MMX_SUPPORTED) +#define JCCOLOR_RGBYCC_MMX_SUPPORTED /* RGB->YCC conversion with MMX */ +#define JCSAMPLE_MMX_SUPPORTED /* downsampling with MMX */ +#define JFDCT_INT_MMX_SUPPORTED /* forward DCT with MMX */ +#endif +#if defined(JSIMD_ENCODER_SSE2_SUPPORTED) +#define JCCOLOR_RGBYCC_SSE2_SUPPORTED /* RGB->YCC conversion with SSE2 */ +#define JCSAMPLE_SSE2_SUPPORTED /* downsampling with SSE2 */ +#define JFDCT_INT_SSE2_SUPPORTED /* forward DCT with SSE2 */ +#endif +#if defined(JSIMD_ENCODER_3DNOW_SUPPORTED) && \ + defined(JSIMD_ENCODER_MMX_SUPPORTED) +#define JFDCT_FLT_3DNOW_MMX_SUPPORTED /* forward DCT with 3DNow!/MMX */ +#endif +#if defined(JSIMD_ENCODER_SSE_SUPPORTED) && \ + defined(JSIMD_ENCODER_MMX_SUPPORTED) +#define JFDCT_FLT_SSE_MMX_SUPPORTED /* forward DCT with SSE/MMX */ +#endif +#if defined(JSIMD_ENCODER_SSE_SUPPORTED) && \ + defined(JSIMD_ENCODER_SSE2_SUPPORTED) +#define JFDCT_FLT_SSE_SSE2_SUPPORTED /* forward DCT with SSE/SSE2 */ +#endif + +/* (decoder part): */ + +#if defined(JSIMD_DECODER_MMX_SUPPORTED) +#define JDCOLOR_YCCRGB_MMX_SUPPORTED /* YCC->RGB conversion with MMX */ +#define JDMERGE_MMX_SUPPORTED /* merged upsampling with MMX */ +#define JDSAMPLE_FANCY_MMX_SUPPORTED /* fancy upsampling with MMX */ +#define JDSAMPLE_SIMPLE_MMX_SUPPORTED /* sloppy upsampling with MMX */ +#define JIDCT_INT_MMX_SUPPORTED /* inverse DCT with MMX */ +#endif +#if defined(JSIMD_DECODER_SSE2_SUPPORTED) +#define JDCOLOR_YCCRGB_SSE2_SUPPORTED /* YCC->RGB conversion with SSE2 */ +#define JDMERGE_SSE2_SUPPORTED /* merged upsampling with SSE2 */ +#define JDSAMPLE_FANCY_SSE2_SUPPORTED /* fancy upsampling with SSE2 */ +#define JDSAMPLE_SIMPLE_SSE2_SUPPORTED /* sloppy upsampling with SSE2 */ +#define JIDCT_INT_SSE2_SUPPORTED /* inverse DCT with SSE2 */ +#endif +#if defined(JSIMD_DECODER_3DNOW_SUPPORTED) && \ + defined(JSIMD_DECODER_MMX_SUPPORTED) +#define JIDCT_FLT_3DNOW_MMX_SUPPORTED /* inverse DCT with 3DNow!/MMX */ +#endif +#if defined(JSIMD_DECODER_SSE_SUPPORTED) && \ + defined(JSIMD_DECODER_MMX_SUPPORTED) +#define JIDCT_FLT_SSE_MMX_SUPPORTED /* inverse DCT with SSE/MMX */ +#endif +#if defined(JSIMD_DECODER_SSE_SUPPORTED) && \ + defined(JSIMD_DECODER_SSE2_SUPPORTED) +#define JIDCT_FLT_SSE_SSE2_SUPPORTED /* inverse DCT with SSE/SSE2 */ +#endif + /* Definitions for speed-related optimizations. */ @@ -328,6 +435,9 @@ typedef int boolean; #ifdef __GNUC__ /* for instance, GNU C knows about inline */ #define INLINE __inline__ #endif +#ifdef _MSC_VER +#define INLINE __inline +#endif #ifndef INLINE #define INLINE /* default is to define it as empty */ #endif diff --git a/jpegdll.def b/jpegdll.def new file mode 100644 index 0000000..5a86cd6 --- /dev/null +++ b/jpegdll.def @@ -0,0 +1,73 @@ +; +; jpegdll.def - module definition file for Win32 DLL +; + +; sed -e "/\(jinit\|jpeg_simd_\(cpu\|os\|merged\)\)/d" -e "s/^EXTERN(..*) \([_A-Za-z][_A-Za-z0-9]*\).*/ \1/p" -e d jpeglib.h jpegint.h + +EXPORTS + ; API functions in jpeglib.h, which are intended + ; to be called by the user applications. + jpeg_std_error + jpeg_CreateCompress + jpeg_CreateDecompress + jpeg_destroy_compress + jpeg_destroy_decompress + jpeg_stdio_dest + jpeg_stdio_src + jpeg_set_defaults + jpeg_set_colorspace + jpeg_default_colorspace + jpeg_set_quality + jpeg_set_linear_quality + jpeg_add_quant_table + jpeg_quality_scaling + jpeg_simple_progression + jpeg_suppress_tables + jpeg_alloc_quant_table + jpeg_alloc_huff_table + jpeg_start_compress + jpeg_write_scanlines + jpeg_finish_compress + jpeg_write_raw_data + jpeg_write_marker + jpeg_write_m_header + jpeg_write_m_byte + jpeg_write_tables + jpeg_read_header + jpeg_start_decompress + jpeg_read_scanlines + jpeg_finish_decompress + jpeg_read_raw_data + jpeg_has_multiple_scans + jpeg_start_output + jpeg_finish_output + jpeg_input_complete + jpeg_new_colormap + jpeg_consume_input + jpeg_calc_output_dimensions + jpeg_save_markers + jpeg_set_marker_processor + jpeg_read_coefficients + jpeg_write_coefficients + jpeg_copy_critical_parameters + jpeg_abort_compress + jpeg_abort_decompress + jpeg_abort + jpeg_destroy + jpeg_resync_to_restart + ; Functions that are introduced by SIMD extension. + jpeg_simd_support + jpeg_simd_mask + jpeg_simd_color_converter + jpeg_simd_downsampler + jpeg_simd_forward_dct + jpeg_simd_color_deconverter + jpeg_simd_upsampler + jpeg_simd_inverse_dct + ; Utility functions in jutils.c. + ; These are needed by some applications. + jdiv_round_up + jround_up + jcopy_sample_rows + jcopy_block_row + jzero_far diff --git a/jpegdll.rc b/jpegdll.rc new file mode 100644 index 0000000..fb3d327 --- /dev/null +++ b/jpegdll.rc @@ -0,0 +1,57 @@ +// +// jpegdll.rc - version information for Win32 DLL +// + +// from +#define VS_VERSION_INFO 1 +#define VS_FFI_FILEFLAGSMASK 0x0000003FL +#define VS_FF_DEBUG 0x00000001L +#define VOS__WINDOWS32 0x00000004L +#define VFT_DLL 0x00000002L +#define VFT2_UNKNOWN 0x00000000L + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 6,2,1,2 + PRODUCTVERSION 6,2,1,2 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS VS_FF_DEBUG +#else + FILEFLAGS 0x00000000L +#endif + FILEOS VOS__WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "00000000" + BEGIN + VALUE "LegalCopyright", "Copyright (C) 1991-1998 Thomas G. Lane\0" + VALUE "FileDescription", "Independent JPEG Group's JPEG Library" + " with SIMD support\0" + VALUE "ProductName", "The Independent JPEG Group's JPEG software" + " release 6b with x86 SIMD extension for" + " IJG JPEG library version 1.02\0" + VALUE "Comments", "This is not an official binary from IJG. " + "The SIMD code in this DLL is copyright (C)" + " 1999-2006 MIYASAKA Masaru.\0" + VALUE "FileVersion", "6.2.1.02\0" + VALUE "ProductVersion", "6.2.1.02\0" + VALUE "OriginalFilename", "jpeg62.dll\0" + VALUE "InternalName", "jpeg62\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0, 0 + END +END + +///////////////////////////////////////////////////////////////////////////// diff --git a/jpegint.h b/jpegint.h index 95b00d4..511e07c 100644 --- a/jpegint.h +++ b/jpegint.h @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : February 4, 2006 + * --------------------------------------------------------------------- + * * This file provides common declarations for the various JPEG modules. * These declarations are considered internal to the JPEG library; most * applications using the library shouldn't need to include this file. @@ -291,6 +298,19 @@ struct jpeg_color_quantizer { #endif +/* SIMD Ext: This macro checks if constants for SSE/SSE2 instructions are + * aligned to a 16-byte boundary. Most of SSE/SSE2 instructions require + * that the memory operand is aligned to a 16-byte boundary; if not, + * a general-protection exception (#GP) is generated. + */ + +#ifdef JSIMD_NO_SSECONST_ALIGNMENT_CHECK +#define IS_CONST_ALIGNED_16(p) (1) +#else +#define IS_CONST_ALIGNED_16(p) (((unsigned)(p) & 0x0F) == 0) +#endif + + /* Short forms of external names for systems with brain-damaged linkers. */ #ifdef NEED_SHORT_EXTERNAL_NAMES @@ -327,6 +347,8 @@ struct jpeg_color_quantizer { #define jzero_far jZeroFar #define jpeg_zigzag_order jZIGTable #define jpeg_natural_order jZAGTable +#define jpeg_simd_cpu_support jSiCpuSupport +#define jpeg_simd_os_support jSiOsSupport #endif /* NEED_SHORT_EXTERNAL_NAMES */ @@ -382,6 +404,10 @@ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */ #endif extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */ +/* SIMD Ext: retrieve SIMD/CPU information */ +EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void)); +EXTERN(unsigned int) jpeg_simd_os_support JPP((unsigned int simd)); + /* Suppress undefined-structure complaints if necessary. */ #ifdef INCOMPLETE_TYPES_BROKEN diff --git a/jpeglib.h b/jpeglib.h index d1be8dd..0506316 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified for SIMD extension. + * Last Modified : February 4, 2006 + * --------------------------------------------------------------------- + * * This file defines the application interface for the JPEG library. * Most applications using the library need only include this file, * and perhaps jerror.h if they want to know the exact error codes. @@ -13,6 +20,10 @@ #ifndef JPEGLIB_H #define JPEGLIB_H +#ifdef __cplusplus +extern "C" { +#endif + /* * First we include the configuration files that record how this * installation of the JPEG library is set up. jconfig.h can be @@ -33,6 +44,13 @@ #define JPEG_LIB_VERSION 62 /* Version 6b */ +/* SIMD Ext: Version ID for the SIMD extension. + */ + +#define JPEG_SIMDEXT_VERSION 102 /* version 1.02 */ +#define JPEG_SIMDEXT_VER_STR "1.02" + + /* Various constants determining the sizes of things. * All of these are specified by the JPEG standard, so don't change them * if you want to be compatible. @@ -235,6 +253,15 @@ typedef enum { JDITHER_FS /* Floyd-Steinberg error diffusion dither */ } J_DITHER_MODE; +/* SIMD Ext: bitflags for jpeg_simd_support() and jpeg_simd_mask() */ + +#define JSIMD_NONE 0x00 +#define JSIMD_MMX 0x01 +#define JSIMD_3DNOW 0x02 +#define JSIMD_SSE 0x04 +#define JSIMD_SSE2 0x08 +#define JSIMD_ALL (JSIMD_MMX | JSIMD_3DNOW | JSIMD_SSE | JSIMD_SSE2) + /* Common fields between JPEG compression and decompression master structs. */ @@ -877,6 +904,18 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); #define jpeg_abort jAbort #define jpeg_destroy jDestroy #define jpeg_resync_to_restart jResyncRestart +#define jpeg_simd_support jSiSupport +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED +#define jpeg_simd_mask jSiMask +#endif +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED +#define jpeg_simd_color_converter jSiCColor +#define jpeg_simd_downsampler jSiDownsampler +#define jpeg_simd_forward_dct jSiFDCT +#define jpeg_simd_color_deconverter jSiDColor +#define jpeg_simd_upsampler jSiUpsampler +#define jpeg_simd_inverse_dct jSiIDCT +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ #endif /* NEED_SHORT_EXTERNAL_NAMES */ @@ -1037,6 +1076,24 @@ EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo)); EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo, int desired)); +/* SIMD Ext: retrieve SIMD/CPU information */ +EXTERN(unsigned int) jpeg_simd_support JPP((j_common_ptr cinfo)); +#ifndef JSIMD_MASKFUNC_NOT_SUPPORTED +EXTERN(unsigned int) jpeg_simd_mask + JPP((j_common_ptr cinfo, unsigned int remove, unsigned int add)); +#endif +#ifndef JSIMD_MODEINFO_NOT_SUPPORTED +EXTERN(unsigned int) jpeg_simd_color_converter JPP((j_compress_ptr cinfo)); +EXTERN(unsigned int) jpeg_simd_downsampler JPP((j_compress_ptr cinfo)); +EXTERN(unsigned int) jpeg_simd_forward_dct JPP((j_compress_ptr cinfo, + int method)); +EXTERN(unsigned int) jpeg_simd_color_deconverter JPP((j_decompress_ptr cinfo)); +EXTERN(unsigned int) jpeg_simd_upsampler JPP((j_decompress_ptr cinfo, + int do_fancy)); +EXTERN(unsigned int) jpeg_simd_inverse_dct JPP((j_decompress_ptr cinfo, + int method)); +#endif /* !JSIMD_MODEINFO_NOT_SUPPORTED */ + /* These marker codes are exported since applications and data source modules * are likely to want to use them. @@ -1093,4 +1150,8 @@ struct jpeg_color_quantizer { long dummy; }; #include "jerror.h" /* fetch error codes too */ #endif +#ifdef __cplusplus +} +#endif + #endif /* JPEGLIB_H */ diff --git a/jsimdcpu.asm b/jsimdcpu.asm new file mode 100644 index 0000000..1c851d1 --- /dev/null +++ b/jsimdcpu.asm @@ -0,0 +1,112 @@ +; +; jsimdcpu.asm - SIMD instruction support check +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : August 23, 2005 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 16 + +copyright: + db " x86 SIMD ext for IJG lib V", JPEG_SIMDEXT_VER_STR + db " Copyright 2006, MIYASAKA Masaru " + + alignz 16 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the CPU supports SIMD instructions +; +; GLOBAL(unsigned int) +; jpeg_simd_cpu_support (void) +; + + align 16 + global EXTN(jpeg_simd_cpu_support) + +EXTN(jpeg_simd_cpu_support): + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused + push edi + + xor edi,edi ; simd support flag + + pushfd + pop eax + mov edx,eax + xor eax, 1<<21 ; flip ID bit in EFLAGS + push eax + popfd + pushfd + pop eax + xor eax,edx + jz short .return ; CPUID is not supported + + ; Check for MMX, SSE and SSE2 instruction support + xor eax,eax + cpuid + test eax,eax + jz short .return + + xor eax,eax + inc eax + cpuid + mov eax,edx ; eax = Standard feature flags + + test eax, 1<<23 ; bit23:MMX + jz short .no_mmx + or edi, byte JSIMD_MMX +.no_mmx: + test eax, 1<<25 ; bit25:SSE + jz short .no_sse + or edi, byte JSIMD_SSE +.no_sse: + test eax, 1<<26 ; bit26:SSE2 + jz short .no_sse2 + or edi, byte JSIMD_SSE2 +.no_sse2: + + ; Check for 3DNow! instruction support + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000000 + jbe short .return + + mov eax, 0x80000001 + cpuid + mov eax,edx ; eax = Extended feature flags + + test eax, 1<<31 ; bit31:3DNow!(vendor independent) + jz short .no_3dnow + or edi, byte JSIMD_3DNOW +.no_3dnow: + +.return: + mov eax,edi + + pop edi +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + ret + diff --git a/jsimddjg.asm b/jsimddjg.asm new file mode 100644 index 0000000..02c82e4 --- /dev/null +++ b/jsimddjg.asm @@ -0,0 +1,130 @@ +; +; jsimddjg.asm - SIMD instruction support check (for DJGPP V.2) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : September 26, 2004 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the OS supports SIMD instructions (DJGPP V.2) +; +; GLOBAL(unsigned int) +; jpeg_simd_os_support (unsigned int simd) +; + +%define EXCEPTION_ILLEGAL_INSTRUCTION 6 ; vector number of #UD + +%define simd ebp+8 ; unsigned int simd +%define mxcsr ebp-4 ; unsigned int mxcsr = 0x1F80 + + align 16 + global EXTN(jpeg_simd_os_support) + +EXTN(jpeg_simd_os_support): + push ebp + mov ebp,esp + push dword 0x1F80 ; default value of MXCSR register + push ebx + + push DWORD [simd] ; simd_flags - modified from exception_handler + + mov bl, EXCEPTION_ILLEGAL_INSTRUCTION + mov ax, 0x0202 ; Get Processor Exception Handler Vector + int 0x31 ; DPMI function call + push ecx ; selector of old exception handler + push edx ; offset of old exception handler + + mov ecx,cs + mov edx, exception_handler + mov bl, EXCEPTION_ILLEGAL_INSTRUCTION + mov ax, 0x0203 ; Set Processor Exception Handler Vector + int 0x31 ; DPMI function call + + mov eax, DWORD [simd] + + ; If floating point emulation is enabled (CR0.EM = 1), + ; executing an MMX/3DNow! instruction generates invalid + ; opcode exception (#UD). + + push byte (.mmx_1 - .mmx_0) ; inst_bytes + push byte (JSIMD_MMX | JSIMD_3DNOW) ; test_flags + test eax, DWORD [esp] + jz short .mmx_1 +.mmx_0: emms ; executing MMX instruction +.mmx_1: add esp, byte 8 + + push byte (.sse_1 - .sse_0) + push byte (JSIMD_SSE | JSIMD_SSE2) + test eax, DWORD [esp] + jz short .sse_1 +.sse_0: ldmxcsr DWORD [mxcsr] ; executing SSE instruction +.sse_1: add esp, byte 8 + + pop edx ; offset of old exception handler + pop ecx ; selector of old exception handler + mov bl, EXCEPTION_ILLEGAL_INSTRUCTION + mov ax, 0x0203 ; Set Processor Exception Handler Vector + int 0x31 ; DPMI function call + + pop eax ; return simd_flags + and eax, byte JSIMD_ALL + + pop ebx + mov esp,ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; LOCAL(void) far +; exception_handler (unsigned long error_code, +; void * context_eip, unsigned short context_cs, +; unsigned long context_eflags, +; void * context_esp, unsigned short context_ss); +; + +%define error_code esp+12+8 ; unsigned long error_code +%define context_eip esp+12+12 ; void * context_eip +%define context_cs esp+12+16 ; unsigned short context_cs +%define context_eflags esp+12+20 ; unsigned long context_eflags +%define context_esp esp+12+24 ; void * context_esp +%define context_ss esp+12+28 ; unsigned short context_ss + +%define test_flags(b) (b)+0 +%define inst_bytes(b) (b)+4 +%define simd_flags(b) (b)+16 + + align 16 + +exception_handler: + push eax + push ecx + push edx + + mov eax, POINTER [context_esp] + mov ecx, DWORD [test_flags(eax)] + mov edx, DWORD [inst_bytes(eax)] + not ecx + add POINTER [context_eip], edx ; next instruction + and DWORD [simd_flags(eax)], ecx ; turn off flag + + pop edx + pop ecx + pop eax + retf + diff --git a/jsimdext.inc b/jsimdext.inc new file mode 100644 index 0000000..a502c07 --- /dev/null +++ b/jsimdext.inc @@ -0,0 +1,347 @@ +; +; jsimdext.inc - common declarations +; +; x86 SIMD extension for IJG JPEG library - version 1.02 +; +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; +; Last Modified : February 4, 2006 +; +; [TAB8] + +%ifndef JSIMDCFG_INCLUDED ; in case jsimdcfg.inc already did +%include "jsimdcfg.inc" ; configuration declarations +%endif + +; ========================================================================== +; System-dependent configurations + +%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- +; * Microsoft Visual C++ +; * MinGW (Minimalist GNU for Windows) +; * CygWin +; * LCC-Win32 + +; -- segment definition -- +; +%define SEG_TEXT .text align=16 public use32 class=CODE +%define SEG_CONST .rdata align=16 public use32 class=CONST + +%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- +; * Borland C++ (Win32) + +; -- segment definition -- +; +%define SEG_TEXT .text align=16 public use32 class=CODE +%define SEG_CONST .data align=16 public use32 class=DATA + +%elifdef ELF ; ----(nasm -felf -DELF ...)------------ +; * Linux +; * *BSD family Unix using elf format +; * Unix System V, including Solaris x86, UnixWare and SCO Unix + +; -- segment definition -- +; +%define SEG_TEXT .text progbits alloc exec nowrite align=16 +%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC +%define EXTN(name) name ; foo() -> foo + +%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- +; * Older Linux using a.out format (nasm -f aout -DAOUT ...) +; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC + +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) + +; -- segment definition -- +; +%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? +%define SEG_CONST .rodata align=16 + +; The generation of position-independent code (PIC) is the default on Darwin. +; +%define PIC +%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing + +%else ; ----(Other case)---------------------- + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +%endif ; ---------------------------------------------- + +; ========================================================================== + +; ---- jpeglib.h ----------------------------------------------------------- + +%define DCTSIZE 8 ; The basic DCT block is 8x8 samples +%define DCTSIZE2 64 ; DCTSIZE squared; # of elements in a block + +%define JSIMD_NONE 0x00 ; bitflags for jpeg_simd_*_support() +%define JSIMD_MMX 0x01 +%define JSIMD_3DNOW 0x02 +%define JSIMD_SSE 0x04 +%define JSIMD_SSE2 0x08 +%define JSIMD_ALL (JSIMD_MMX | JSIMD_3DNOW | JSIMD_SSE | JSIMD_SSE2) + +; ---- jpegint.h ----------------------------------------------------------- + +; Short forms of external names for systems with brain-damaged linkers. +; +%ifdef NEED_SHORT_EXTERNAL_NAMES +%define jpeg_simd_cpu_support jSiCpuSupport +%define jpeg_simd_os_support jSiOsSupport +%endif ; NEED_SHORT_EXTERNAL_NAMES + +; ---- jmorecfg.h ---------------------------------------------------------- +; +; BITS_IN_JSAMPLE==8 (8-bit sample values) is the only valid setting +; on this SIMD implementation. +; +%define BITS_IN_JSAMPLE 8 ; Caution: Cannot be changed + +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) +%define MAXJSAMPLE 255 +%define CENTERJSAMPLE 128 + +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) + +; INT32 must hold at least signed 32-bit values. +; On this SIMD implementation, this must be 'long'. +; +%define INT32 dword ; long +%define SIZEOF_INT32 SIZEOF_DWORD ; sizeof(INT32) + +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) + +; -------------------------------------------------------------------------- + +%define JSAMPROW POINTER ; JSAMPLE FAR * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF FAR * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) + +%define POINTER dword ; general pointer type +%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) +%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT + +%define INT dword ; signed integer type +%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) +%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT + +%define FP32 dword ; IEEE754 single +%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) +%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT + +%define FP64 qword ; IEEE754 double +%define SIZEOF_FP64 SIZEOF_QWORD ; sizeof(FP64) +%define FP64_BIT QWORD_BIT ; sizeof(FP64)*BYTE_BIT + +%define FP80 tword ; IEEE754 double-extended(x86) +%define SIZEOF_FP80 SIZEOF_TWORD ; sizeof(FP80) +%define FP80_BIT TWORD_BIT ; sizeof(FP80)*BYTE_BIT + +%define MMWORD qword ; int64 (MMX register) +%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) +%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT + +%define XMMWORD dqword ; int128 (SSE register) +%define SIZEOF_XMMWORD SIZEOF_DQWORD ; sizeof(XMMWORD) +%define XMMWORD_BIT DQWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT + +%define SIZEOF_BYTE 1 ; sizeof(BYTE) +%define SIZEOF_WORD 2 ; sizeof(WORD) +%define SIZEOF_DWORD 4 ; sizeof(DWORD) +%define SIZEOF_QWORD 8 ; sizeof(QWORD) +%define SIZEOF_TBYTE 10 ; sizeof(TBYTE) +%define SIZEOF_TWORD 10 ; sizeof(TWORD) +%define SIZEOF_DQWORD 16 ; sizeof(DQWORD) + +%define BYTE_BIT 8 ; CHAR_BIT in C +%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT +%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT +%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT +%define TBYTE_BIT 80 ; sizeof(TBYTE)*BYTE_BIT +%define TWORD_BIT 80 ; sizeof(TWORD)*BYTE_BIT +%define DQWORD_BIT 128 ; sizeof(DQWORD)*BYTE_BIT + +%idefine TBYTE TWORD ; NASM uses the keyword 'TWORD' instead of 'TBYTE' +%idefine DQWORD ; currently not supported by NASM +%idefine _MMWORD ; +%idefine _DWORD ; + +; -------------------------------------------------------------------------- +; External Symbol Name +; +%ifndef EXTN +%define EXTN(name) _ %+ name ; foo() -> _foo +%endif + +; -------------------------------------------------------------------------- +; Macros for position-independent code (PIC) support +; +%ifndef GOT_SYMBOL +%undef PIC +%endif + +%ifdef PIC ; ------------------------------------------- + +%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- + +; At present, nasm doesn't seem to support PIC generation for Mach-O. +; The PIC support code below is a little tricky. + + SECTION SEG_CONST +const_base: + +%define GOTOFF(got,sym) (got) + (sym) - const_base + +%imacro get_GOT 1 + ; NOTE: this macro destroys ecx resister. + call %%geteip + add ecx, byte (%%ref - $) + jmp short %%adjust +%%geteip: + mov ecx, POINTER [esp] + ret +%%adjust: + push ebp + xor ebp,ebp ; ebp = 0 +%ifidni %1,ebx ; (%1 == ebx) + ; db 0x8D,0x9C + jmp near const_base = + ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) + db 0x8D,0x9C ; 8D,9C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: +%else ; (%1 != ebx) + ; db 0x8D,0x8C + jmp near const_base = + ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) + db 0x8D,0x8C ; 8D,8C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: mov %1, ecx +%endif ; (%1 == ebx) + pop ebp +%endmacro + +%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- + +%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff + +%imacro get_GOT 1 + extern GOT_SYMBOL + call %%geteip + add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc + jmp short %%done +%%geteip: + mov %1, POINTER [esp] + ret +%%done: +%endmacro + +%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- + +%imacro pushpic 1.nolist + push %1 +%endmacro +%imacro poppic 1.nolist + pop %1 +%endmacro +%imacro movpic 2.nolist + mov %1,%2 +%endmacro + +%else ; !PIC ----------------------------------------- + +%define GOTOFF(got,sym) (sym) + +%imacro get_GOT 1.nolist +%endmacro +%imacro pushpic 1.nolist +%endmacro +%imacro poppic 1.nolist +%endmacro +%imacro movpic 2.nolist +%endmacro + +%endif ; PIC ----------------------------------------- + +; -------------------------------------------------------------------------- +; Align the next instruction on {2,4,8,16,..}-byte boundary. +; ".balign n,,m" in GNU as +; +%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) +%define FILLB(b,n) (($$-(b)) & ((n)-1)) + +%imacro alignx 1-2.nolist 0xFFFF +%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ + db 0x90 ; nop + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ + db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ + db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ + db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ + db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ + db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ + db 0x8B,0xED ; mov ebp,ebp + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ + db 0x90 ; nop +%endmacro + +; Align the next data on {2,4,8,16,..}-byte boundary. +; +%imacro alignz 1.nolist + align %1, db 0 ; filling zeros +%endmacro + +; -------------------------------------------------------------------------- diff --git a/jsimdgcc.c b/jsimdgcc.c new file mode 100644 index 0000000..d6ad75b --- /dev/null +++ b/jsimdgcc.c @@ -0,0 +1,95 @@ +/* + * jsimdgcc.c - SIMD instruction support check (gcc) + * + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * Last Modified : January 24, 2006 + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + +#include +#include + + +static volatile int lockf /* = 0 */; +static jmp_buf jmpbuf; + + +/* + * Exception handler for signal() + */ + +LOCAL(void) +exception_handler (int sig) +{ + signal(SIGILL, SIG_DFL); + longjmp(jmpbuf, 1); +} + + +/* + * Check if the OS supports SIMD instructions + */ + +GLOBAL(unsigned int) +jpeg_simd_os_support (unsigned int simd) +{ +#ifdef __GNUC__ /* gcc (i386) */ + unsigned int mxcsr = 0x1F80; + + /* enter critical section */ + __asm__ __volatile__ ( + "get_lock: \n\t" + "movl $1,%%eax \n\t" + "xchgl %0,%%eax \n\t" /* try to get lock */ + "cmpl $0,%%eax \n\t" /* test if successful */ + "je critical_section \n" + "spin_loop: \n\t" + /*".byte 0xF3,0x90 \n\t"*/ /* "pause" on P4 (short delay) */ + "cmpl $0,%0 \n\t" /* check if lock is free */ + "jne spin_loop \n\t" + "jmp get_lock \n" + "critical_section: \n\t" + : "=m" (lockf) : "m" (lockf) : "%eax" + ); + + /* If floating point emulation is enabled (CR0.EM = 1), + * executing an MMX/3DNow! instruction generates invalid + * opcode exception (#UD). + */ + if (simd & (JSIMD_MMX | JSIMD_3DNOW)) { + if (!setjmp(jmpbuf)) { + signal(SIGILL, exception_handler); + __asm__ __volatile__ ( + ".byte 0x0F,0x77" /* emms */ + ); + signal(SIGILL, SIG_DFL); + } else { + simd &= ~(JSIMD_MMX | JSIMD_3DNOW); + } + } + if (simd & (JSIMD_SSE | JSIMD_SSE2)) { + if (!setjmp(jmpbuf)) { + signal(SIGILL, exception_handler); + __asm__ __volatile__ ( + "leal %0,%%eax \n\t" + ".byte 0x0F,0xAE,0x10 \n\t" /* ldmxcsr [eax] */ + : : "m" (mxcsr) : "%eax" + ); + signal(SIGILL, SIG_DFL); + } else { + simd &= ~(JSIMD_SSE | JSIMD_SSE2); + } + } + + /* leave critical section */ + lockf = 0; /* release lock */ +#endif /* __GNUC__ */ + + return simd; +} diff --git a/jsimdw32.asm b/jsimdw32.asm new file mode 100644 index 0000000..7f2cdbc --- /dev/null +++ b/jsimdw32.asm @@ -0,0 +1,121 @@ +; +; jsimdw32.asm - SIMD instruction support check (for Win32) +; +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; Last Modified : September 26, 2004 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the OS supports SIMD instructions (Win32) +; +; Reference: "Win32 Exception handling for assembler programmers" +; http://www.jorgon.freeserve.co.uk/Except/Except.htm +; +; GLOBAL(unsigned int) +; jpeg_simd_os_support (unsigned int simd) +; + +%define simd ebp+8 ; unsigned int simd +%define mxcsr ebp-4 ; unsigned int mxcsr = 0x1F80 + + align 16 + global EXTN(jpeg_simd_os_support) + +EXTN(jpeg_simd_os_support): + push ebp + mov ebp,esp + push dword 0x1F80 ; default value of MXCSR register + push exception_handler + push POINTER [fs:0] ; prev_record_ptr + mov POINTER [fs:0], esp ; this_record_ptr + + mov eax, DWORD [simd] + and eax, byte JSIMD_ALL + xor ecx,ecx + xor edx,edx + + ; If floating point emulation is enabled (CR0.EM = 1), + ; executing an MMX/3DNow! instruction generates invalid + ; opcode exception (#UD). + + mov cl, (JSIMD_MMX | JSIMD_3DNOW) + mov dl, (.mmx_1 - .mmx_0) + test al,cl + jz short .mmx_1 +.mmx_0: emms ; executing MMX instruction +.mmx_1: + mov cl, (JSIMD_SSE | JSIMD_SSE2) + mov dl, (.sse_1 - .sse_0) + test al,cl + jz short .sse_1 +.sse_0: ldmxcsr DWORD [mxcsr] ; executing SSE instruction +.sse_1: + + pop POINTER [fs:0] ; prev_record_ptr + mov esp,ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; LOCAL(EXCEPTION_DISPOSITION) +; exception_handler (struct _EXCEPTION_RECORD * ExceptionRecord, +; void * EstablisherFrame, struct _CONTEXT * ContextRecord, +; void * DispatcherContext); +; + +%define ExceptionContinueExecution 0 ; from +%define ExceptionContinueSearch 1 ; typedef enum _EXCEPTION_DISPOSITION { +%define ExceptionNestedException 2 ; ... +%define ExceptionCollidedUnwind 3 ; } EXCEPTION_DISPOSITION + +%define EXCEPTION_ILLEGAL_INSTRUCTION 0xC000001D ; from + +%define ExceptionRecord esp+4 ; struct _EXCEPTION_RECORD * +%define EstablisherFrame esp+8 ; void * EstablisherFrame +%define ContextRecord esp+12 ; struct _CONTEXT * ContextRecord +%define DispatcherContext esp+16 ; void * DispatcherContext + +%define ExceptionCode(b) (b)+0 ; ExceptionRecord->ExceptionCode +%define ExceptionFlags(b) (b)+4 ; ExceptionRecord->ExceptionFlags +%define Context_Edx(b) (b)+168 ; ContextRecord->Edx +%define Context_Ecx(b) (b)+172 ; ContextRecord->Ecx +%define Context_Eax(b) (b)+176 ; ContextRecord->Eax +%define Context_Eip(b) (b)+184 ; ContextRecord->Eip + + align 16 + +exception_handler: + mov edx, POINTER [ExceptionRecord] + mov eax, ExceptionContinueSearch + + cmp DWORD [ExceptionFlags(edx)], byte 0 + jne short .return ; noncontinuable exception + cmp DWORD [ExceptionCode(edx)], EXCEPTION_ILLEGAL_INSTRUCTION + jne short .return ; not a #UD exception + + mov eax, POINTER [ContextRecord] + mov ecx, DWORD [Context_Ecx(eax)] + mov edx, DWORD [Context_Edx(eax)] + not ecx + add DWORD [Context_Eip(eax)], edx ; next instruction + and DWORD [Context_Eax(eax)], ecx ; turn off flag + mov eax, ExceptionContinueExecution +.return: + ret + diff --git a/libjpeg.spec b/libjpeg.spec new file mode 100644 index 0000000..2c9c224 --- /dev/null +++ b/libjpeg.spec @@ -0,0 +1,234 @@ +%define LIBVER 62.1.0 +Summary: A library for manipulating JPEG image format files (with SIMD support) +Summary(ja): JPEG ·Á¼°²èÁü¥Õ¥¡¥¤¥ë¤ò°·¤¦°Ù¤Î¥é¥¤¥Ö¥é¥ê (x86 SIMD ÂбþÈÇ) +Name: libjpeg +Version: 6bx1.02 +Release: 1 +License: distributable +Group: System Environment/Libraries +Source0: http://cetus.sakura.ne.jp/softlab/jpeg-x86simd/sources/jpegsrc-6b-x86simd-1.02.tar.bz2 +Buildroot: %{_tmppath}/%{name}-%{version}-root +ExclusiveArch: %{ix86} +BuildPrereq: nasm >= 0.98.25 + +%package devel +Summary: Development tools for programs which will use the libjpeg library. +Summary(ja): libjpeg ¥é¥¤¥Ö¥é¥ê¤ò»È¤¦¥×¥í¥°¥é¥à¸þ¤±³«È¯¥Ä¡¼¥ë +Group: Development/Libraries +Requires: libjpeg = %{version}-%{release} + +%description +The libjpeg package contains a library of functions for manipulating +JPEG images, as well as simple client programs for accessing the +libjpeg functions. Libjpeg client programs include cjpeg, djpeg, +jpegtran, rdjpgcom and wrjpgcom. Cjpeg compresses an image file into +JPEG format. Djpeg decompresses a JPEG file into a regular image +file. Jpegtran can perform various useful transformations on JPEG +files. Rdjpgcom displays any text comments included in a JPEG file. +Wrjpgcom inserts text comments into a JPEG file. + +The libjpeg library in this package uses SIMD instructions if available. +On a processor that supports SIMD instructions (MMX, SSE, etc), +it runs 2-3 times faster than the original version of libjpeg. + +%description -l ja +libjpeg ¥Ñ¥Ã¥±¡¼¥¸¤Ë¤Ï JPEG ²èÁü¤ò°·¤¦°Ù¤ËɬÍפʥ饤¥Ö¥é¥ê¤È¡¤ +libjpeg ´Ø¿ô¤Ë¥¢¥¯¥»¥¹¤¹¤ë°Ù¤Î´Êñ¤Ê¥¯¥é¥¤¥¢¥ó¥È¥×¥í¥°¥é¥à¤¬ +¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡¥libjpeg ¥¯¥é¥¤¥¢¥ó¥È¥×¥í¥°¥é¥à¤Ë¤Ï cjpeg, djpeg, +jpegtran, rdjpgcom, wrjpgcom ¤¬¤¢¤ê¤Þ¤¹¡¥cjpeg ¤Ï²èÁü¥Õ¥¡¥¤¥ë¤ò +JPEG ·Á¼°¤Ë°µ½Ì¤·¤Þ¤¹¡¥djpeg ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤òÄ̾ï¤Î²èÁü¥Õ¥¡¥¤¥ë¤Ë +Ÿ³«¤·¤Þ¤¹¡¥jpegtran ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤ËÍÍ¡¹¤ÊÊÑ´¹¤ò»Ü¤¹¤³¤È¤¬½ÐÍè¤Þ¤¹¡¥ +rdjpgcom ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ë¥Æ¥­¥¹¥È·Á¼°¤Î¥³¥á¥ó¥È¤òɽ¼¨¤·¡¤ +wrjpgcom ¤Ï JPEG ¥Õ¥¡¥¤¥ë¤Ë¥Æ¥­¥¹¥È·Á¼°¤Î¥³¥á¥ó¥È¤òÄɲä·¤Þ¤¹¡¥ + +¤³¤Î¥Ñ¥Ã¥±¡¼¥¸¤Ë¼ý¤á¤é¤ì¤Æ¤¤¤ë libjpeg ¥é¥¤¥Ö¥é¥ê¤Ï¡¢x86 SIMD ÂбþÈǤǤ¹¡£ +MMX ¤ä SSE ¤Ê¤É¤Î SIMD ±é»»µ¡Ç½¤òÁõÈ÷¤·¤Æ¤¤¤ë¥×¥í¥»¥Ã¥µ¾å¤Çưºî¤µ¤»¤ë¤È¡¢ +¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤ÈÈæ³Ó¤·¤Æ 2¡Á3ÇÜÄøÅ٤ήÅÙ¤ÇÆ°ºî¤·¤Þ¤¹¡£ + +%description devel +The libjpeg-devel package includes the header files and static libraries +necessary for developing programs which will manipulate JPEG files using +the libjpeg library. + +If you are going to develop programs which will manipulate JPEG images, +you should install libjpeg-devel. You'll also need to have the libjpeg +package installed. + +%description devel -l ja +libjpeg-devel ¥Ñ¥Ã¥±¡¼¥¸¤Ë¤Ï¡¤libjpeg ¥é¥¤¥Ö¥é¥ê¤ò»È¤Ã¤Æ JPEG ¥Õ¥¡¥¤¥ë¤ò +°·¤¦¥×¥í¥°¥é¥à¤ò³«È¯¤¹¤ë¤Î¤ËɬÍפʥإåÀ¥Õ¥¡¥¤¥ë¤È¥¹¥¿¥Æ¥£¥Ã¥¯¥é¥¤¥Ö¥é¥ê¤¬ +¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡¥ + +JPEG ²èÁü¤ò°·¤¦¥×¥í¥°¥é¥à¤ò³«È¯¤¹¤ëºÝ¤Ë¤Ï¡¤libjpeg-devel ¤ò +¥¤¥ó¥¹¥È¡¼¥ë¤·¤Æ²¼¤µ¤¤¡¥Æ±»þ¤Ë libjpeg ¥Ñ¥Ã¥±¡¼¥¸¤â¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë +ɬÍפ¬¤¢¤ê¤Þ¤¹¡¥ + +%prep +%setup -q -n jpeg-6bx +# suppress "libtoolize --copy --force" +mv configure.in configure.in_ + +%build +%configure --enable-shared --enable-static + +make libdir=%{_libdir} %{?_smp_mflags} +LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD make test + +%install +rm -rf $RPM_BUILD_ROOT + +%makeinstall +#strip -R .comment $RPM_BUILD_ROOT/usr/bin/* || : +#/sbin/ldconfig -n $RPM_BUILD_ROOT/%{_libdir} + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%doc usage.doc wizard.doc README +%{_libdir}/libjpeg.so.* +%{_bindir}/* +%{_mandir}/*/* + +%files devel +%defattr(-,root,root) +%doc libjpeg.doc coderules.doc structure.doc example.c +%doc simd_*.txt +%{_libdir}/*.a +%{_libdir}/*.la +%{_libdir}/*.so +/usr/include/*.h + +%changelog +* Sat Feb 04 2006 MIYASAKA Masaru - 6bx1.02-1 +- upgraded to 6bx1.02 + +* Thu Jan 26 2006 MIYASAKA Masaru - 6bx1.01-1 +- upgraded to 6bx1.01 + +* Thu Mar 24 2005 MIYASAKA Masaru - 6bx1.0-1 +- based on 6b-33 from Fedora Core 3 and modified for SIMD-extended libjpeg +- added Japanese summary and description, which is delivered from Vine Linux +- moved wizard.doc to main package + +* Thu Oct 7 2004 Matthias Clasen - 6b-33 +- Add URL. (#134791) + +* Tue Jun 15 2004 Elliot Lee +- rebuilt + +* Tue Mar 02 2004 Elliot Lee +- rebuilt + +* Fri Feb 13 2004 Elliot Lee +- rebuilt + +* Thu Sep 25 2003 Jeremy Katz 6b-30 +- rebuild to fix gzipped file md5sums (#91211) + +* Tue Sep 23 2003 Florian La Roche +- do not set rpath + +* Wed Jun 04 2003 Elliot Lee +- rebuilt + +* Thu Feb 13 2003 Elliot Lee 6b-27 +- Add libjpeg-shared.patch to fix shlibs on powerpc + +* Tue Feb 04 2003 Florian La Roche +- add symlink to shared lib + +* Wed Jan 22 2003 Tim Powers +- rebuilt + +* Mon Jan 6 2003 Jonathan Blandford +- add docs, #76508 + +* Fri Dec 13 2002 Elliot Lee 6b-23 +- Merge in multilib changes +- _smp_mflags + +* Tue Sep 10 2002 Than Ngo 6b-22 +- use %%_libdir + +* Fri Jun 21 2002 Tim Powers +- automated rebuild + +* Thu May 23 2002 Tim Powers +- automated rebuild + +* Thu Jan 31 2002 Bernhard Rosenkraenzer 6b-19 +- Fix bug #59011 + +* Mon Jan 28 2002 Bernhard Rosenkraenzer 6b-18 +- Fix bug #58982 + +* Wed Jan 09 2002 Tim Powers +- automated rebuild + +* Tue Jul 24 2001 Bill Nottingham +- require libjpeg = %%{version} + +* Sun Jun 24 2001 Elliot Lee +- Bump release + rebuild. + +* Mon Dec 11 2000 Than Ngo +- rebuilt with the fixed fileutils +- use %%{_tmppath} + +* Wed Nov 8 2000 Bernhard Rosenkraenzer +- fix a typo (strip -R .comment, not .comments) + +* Thu Jul 13 2000 Prospector +- automatic rebuild + +* Sat Jun 17 2000 Bernhard Rosenkraenzer +- FHSify +- add some C++ tweaks to the headers as suggested by bug #9822) + +* Wed May 5 2000 Bill Nottingham +- configure tweaks for ia64; remove alpha patch (it's pointless) + +* Sat Feb 5 2000 Bernhard Rosenkräîzer +- rebuild to get compressed man pages +- fix description +- some minor tweaks to the spec file +- add docs +- fix build on alpha (alphaev6 stuff) + +* Sun Mar 21 1999 Cristian Gafton +- auto rebuild in the new build environment (release 9) + +* Wed Jan 13 1999 Cristian Gafton +- patch to build on arm +- build for glibc 2.1 + +* Mon Oct 12 1998 Cristian Gafton +- strip binaries + +* Mon Aug 3 1998 Jeff Johnson +- fix buildroot problem. + +* Tue Jun 09 1998 Prospector System +- translations modified for de + +* Thu Jun 04 1998 Marc Ewing +- up to release 4 +- remove patch that set (improper) soname - libjpeg now does it itself + +* Thu May 07 1998 Prospector System +- translations modified for de, fr, tr + +* Fri May 01 1998 Cristian Gafton +- fixed build on manhattan + +* Wed Apr 08 1998 Cristian Gafton +- upgraded to version 6b + +* Wed Oct 08 1997 Donnie Barnes +- new package to remove jpeg stuff from libgr and put in it's own package diff --git a/ltconfig b/ltconfig deleted file mode 100755 index 2347e69..0000000 --- a/ltconfig +++ /dev/null @@ -1,1512 +0,0 @@ -#! /bin/sh - -# ltconfig - Create a system-specific libtool. -# Copyright (C) 1996-1998 Free Software Foundation, Inc. -# Gordon Matzigkeit , 1996 -# -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# A lot of this script is taken from autoconf-2.10. - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test "${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi - -echo=echo -if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then : -else - # The Solaris and AIX default echo program unquotes backslashes. - # This makes it impossible to quote backslashes using - # echo "$something" | sed 's/\\/\\\\/g' - # So, we emulate echo with printf '%s\n' - echo="printf %s\\n" - if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then : - else - # Oops. We have no working printf. Try to find a not-so-buggy echo. - echo=echo - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}:" - for dir in $PATH /usr/ucb; do - if test -f $dir/echo && test "X`$dir/echo '\t'`" = 'X\t'; then - echo="$dir/echo" - break - fi - done - IFS="$save_ifs" - fi -fi - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed='sed -e s/^X//' -sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g' - -# Same as above, but do not quote variable references. -double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g' - -# The name of this program. -progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'` - -# Constants: -PROGRAM=ltconfig -PACKAGE=libtool -VERSION=1.2 -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c 1>&5' -ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.c $LIBS 1>&5' -rm="rm -f" - -help="Try \`$progname --help' for more information." - -# Global variables: -can_build_shared=yes -enable_shared=yes -# All known linkers require a `.a' archive for static linking. -enable_static=yes -ltmain= -silent= -srcdir= -ac_config_guess= -ac_config_sub= -host= -nonopt= -verify_host=yes -with_gcc=no -with_gnu_ld=no - -old_AR="$AR" -old_CC="$CC" -old_CFLAGS="$CFLAGS" -old_CPPFLAGS="$CPPFLAGS" -old_LD="$LD" -old_LN_S="$LN_S" -old_NM="$NM" -old_RANLIB="$RANLIB" - -# Parse the command line options. -args= -prev= -for option -do - case "$option" in - -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - eval "$prev=\$option" - prev= - continue - fi - - case "$option" in - --help) cat <&2 - echo "$help" 1>&2 - exit 1 - ;; - - *) - if test -z "$ltmain"; then - ltmain="$option" - elif test -z "$host"; then -# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1 -# if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then -# echo "$progname: warning \`$option' is not a valid host type" 1>&2 -# fi - host="$option" - else - echo "$progname: too many arguments" 1>&2 - echo "$help" 1>&2 - exit 1 - fi ;; - esac -done - -if test -z "$ltmain"; then - echo "$progname: you must specify a LTMAIN file" 1>&2 - echo "$help" 1>&2 - exit 1 -fi - -if test -f "$ltmain"; then : -else - echo "$progname: \`$ltmain' does not exist" 1>&2 - echo "$help" 1>&2 - exit 1 -fi - -# Quote any args containing shell metacharacters. -ltconfig_args= -for arg -do - case "$arg" in - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ltconfig_args="$ltconfig_args '$arg'" ;; - *) ltconfig_args="$ltconfig_args $arg" ;; - esac -done - -# A relevant subset of AC_INIT. - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 5 compiler messages saved in config.log -# 6 checking for... messages and results -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>>./config.log - -# NLS nuisances. -# Only set LANG and LC_ALL to C if already set. -# These must not be set unconditionally because not all systems understand -# e.g. LANG=C (notably SCO). -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LANG+set}" = set; then LANG=C; export LANG; fi - -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - -if test -z "$srcdir"; then - # Assume the source directory is the same one as the path to ltmain.sh. - srcdir=`$echo "$ltmain" | $Xsed -e 's%/[^/]*$%%'` - test "$srcdir" = "$ltmain" && srcdir=. -fi - -trap "$rm conftest*; exit 1" 1 2 15 -if test "$verify_host" = yes; then - # Check for config.guess and config.sub. - ac_aux_dir= - for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/config.guess; then - ac_aux_dir=$ac_dir - break - fi - done - if test -z "$ac_aux_dir"; then - echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2 - echo "$help" 1>&2 - exit 1 - fi - ac_config_guess=$ac_aux_dir/config.guess - ac_config_sub=$ac_aux_dir/config.sub - - # Make sure we can run config.sub. - if $ac_config_sub sun4 >/dev/null 2>&1; then : - else - echo "$progname: cannot run $ac_config_sub" 1>&2 - echo "$help" 1>&2 - exit 1 - fi - - echo $ac_n "checking host system type""... $ac_c" 1>&6 - - host_alias=$host - case "$host_alias" in - "") - if host_alias=`$ac_config_guess`; then : - else - echo "$progname: cannot guess host type; you must specify one" 1>&2 - echo "$help" 1>&2 - exit 1 - fi ;; - esac - host=`$ac_config_sub $host_alias` - echo "$ac_t$host" 1>&6 - - # Make sure the host verified. - test -z "$host" && exit 1 - -elif test -z "$host"; then - echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2 - echo "$help" 1>&2 - exit 1 -else - host_alias=$host -fi - -# Transform linux* to *-*-linux-gnu*, to support old configure scripts. -case "$host_os" in -linux-gnu*) ;; -linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` -esac - -host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` -host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` -host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` - -case "$host_os" in -aix3*) - # AIX sometimes has problems with the GCC collect2 program. For some - # reason, if we set the COLLECT_NAMES environment variable, the problems - # vanish in a puff of smoke. - if test "${COLLECT_NAMES+set}" != set; then - COLLECT_NAMES= - export COLLECT_NAMES - fi - ;; -esac - -# Determine commands to create old-style static archives. -old_archive_cmds='$AR cru $oldlib$oldobjs' -old_postinstall_cmds='chmod 644 $oldlib' -old_postuninstall_cmds= - -# Set a sane default for `AR'. -test -z "$AR" && AR=ar - -# If RANLIB is not set, then run the test. -if test "${RANLIB+set}" != "set"; then - result=no - - echo $ac_n "checking for ranlib... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}:" - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/ranlib; then - RANLIB="ranlib" - result="ranlib" - break - fi - done - IFS="$save_ifs" - - echo "$ac_t$result" 1>&6 -fi - -if test -n "$RANLIB"; then - old_archive_cmds="$old_archive_cmds;\$RANLIB \$oldlib" - old_postinstall_cmds="\$RANLIB \$oldlib;$old_postinstall_cmds" -fi - -# Check to see if we are using GCC. -if test "$with_gcc" != yes || test -z "$CC"; then - # If CC is not set, then try to find GCC or a usable CC. - if test -z "$CC"; then - echo $ac_n "checking for gcc... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}:" - for dir in $PATH; do - IFS="$save_ifs" - test -z "$dir" && dir=. - if test -f $dir/gcc; then - CC="gcc" - break - fi - done - IFS="$save_ifs" - - if test -n "$CC"; then - echo "$ac_t$CC" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - fi - - # Not "gcc", so try "cc", rejecting "/usr/ucb/cc". - if test -z "$CC"; then - echo $ac_n "checking for cc... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}:" - cc_rejected=no - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/cc; then - if test "$dir/cc" = "/usr/ucb/cc"; then - cc_rejected=yes - continue - fi - CC="cc" - break - fi - done - IFS="$save_ifs" - if test $cc_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same name, so the bogon will be chosen - # first if we set CC to just the name; use the full file name. - shift - set dummy "$dir/cc" "$@" - shift - CC="$@" - fi - fi - - if test -n "$CC"; then - echo "$ac_t$CC" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - - if test -z "$CC"; then - echo "$progname: error: no acceptable cc found in \$PATH" 1>&2 - exit 1 - fi - fi - - # Now see if the compiler is really GCC. - with_gcc=no - echo $ac_n "checking whether we are using GNU C... $ac_c" 1>&6 - echo "$progname:424: checking whether we are using GNU C" >&5 - - $rm conftest.c - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - with_gcc=yes - fi - $rm conftest.c - echo "$ac_t$with_gcc" 1>&6 -fi - -# Allow CC to be a program name with arguments. -set dummy $CC -compiler="$2" - -echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6 -pic_flag= -special_shlib_compile_flags= -wl= -link_static_flag= -no_builtin_flag= - -if test "$with_gcc" = yes; then - wl='-Wl,' - link_static_flag='-static' - no_builtin_flag=' -fno-builtin' - - case "$host_os" in - aix3* | aix4* | irix5* | irix6* | osf3* | osf4*) - # PIC is the default for these OSes. - ;; - os2*) - # We can build DLLs from non-PIC. - ;; - amigaos*) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the `-m68020' flag to GCC prevents building anything better, - # like `-m68040'. - pic_flag='-m68020 -resident32 -malways-restore-a4' - ;; - *) - pic_flag='-fPIC' - ;; - esac -else - # PORTME Check for PIC flags for the system compiler. - case "$host_os" in - aix3* | aix4*) - # All AIX code is PIC. - link_static_flag='-bnso -bI:/lib/syscalls.exp' - ;; - - hpux9* | hpux10*) - # Is there a better link_static_flag that works with the bundled CC? - wl='-Wl,' - link_static_flag="${wl}-a ${wl}archive" - pic_flag='+Z' - ;; - - irix5* | irix6*) - wl='-Wl,' - link_static_flag='-non_shared' - # PIC (with -KPIC) is the default. - ;; - - os2*) - # We can build DLLs from non-PIC. - ;; - - osf3* | osf4*) - # All OSF/1 code is PIC. - wl='-Wl,' - link_static_flag='-non_shared' - ;; - - sco3.2v5*) - pic_flag='-Kpic' - link_static_flag='-dn' - special_shlib_compile_flags='-belf' - ;; - - solaris2*) - pic_flag='-KPIC' - link_static_flag='-Bstatic' - wl='-Wl,' - ;; - - sunos4*) - pic_flag='-PIC' - link_static_flag='-Bstatic' - wl='-Qoption ld ' - ;; - - sysv4.2uw2*) - pic_flag='-KPIC' - link_static_flag='-Bstatic' - wl='-Wl,' - ;; - - uts4*) - pic_flag='-pic' - link_static_flag='-Bstatic' - ;; - - *) - can_build_shared=no - ;; - esac -fi - -if test -n "$pic_flag"; then - echo "$ac_t$pic_flag" 1>&6 - - # Check to make sure the pic_flag actually works. - echo $ac_n "checking if $compiler PIC flag $pic_flag works... $ac_c" 1>&6 - $rm conftest* - echo > conftest.c - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $pic_flag -DPIC" - echo "$progname:547: checking if $compiler PIC flag $pic_flag works" >&5 - if { (eval echo $progname:548: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then - # Append any warnings to the config.log. - cat conftest.err 1>&5 - - # On HP-UX, both CC and GCC only warn that PIC is supported... then they - # create non-PIC objects. So, if there were any warnings, we assume that - # PIC is not supported. - if test -s conftest.err; then - echo "$ac_t"no 1>&6 - can_build_shared=no - pic_flag= - else - echo "$ac_t"yes 1>&6 - pic_flag=" $pic_flag" - fi - else - # Append any errors to the config.log. - cat conftest.err 1>&5 - can_build_shared=no - pic_flag= - echo "$ac_t"no 1>&6 - fi - CFLAGS="$save_CFLAGS" - $rm conftest* -else - echo "$ac_t"none 1>&6 -fi - -# Check for any special shared library compilation flags. -if test -n "$special_shlib_compile_flags"; then - echo "$progname: warning: \`$CC' requires \`$special_shlib_compile_flags' to build shared libraries" 1>&2 - if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$special_shlib_compile_flags[ ]" >/dev/null; then : - else - echo "$progname: add \`$special_shlib_compile_flags' to the CC or CFLAGS env variable and reconfigure" 1>&2 - can_build_shared=no - fi -fi - -echo $ac_n "checking if $compiler static flag $link_static_flag works... $ac_c" 1>&6 -$rm conftest* -echo 'main(){return(0);}' > conftest.c -save_LDFLAGS="$LDFLAGS" -LDFLAGS="$LDFLAGS $link_static_flag" -echo "$progname:591: checking if $compiler static flag $link_static_flag works" >&5 -if { (eval echo $progname:592: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then - echo "$ac_t$link_static_flag" 1>&6 -else - echo "$ac_t"none 1>&6 - link_static_flag= -fi -LDFLAGS="$save_LDFLAGS" -$rm conftest* - -if test -z "$LN_S"; then - # Check to see if we can use ln -s, or we need hard links. - echo $ac_n "checking whether ln -s works... $ac_c" 1>&6 - $rm conftestdata - if ln -s X conftestdata 2>/dev/null; then - $rm conftestdata - LN_S="ln -s" - else - LN_S=ln - fi - if test "$LN_S" = "ln -s"; then - echo "$ac_t"yes 1>&6 - else - echo "$ac_t"no 1>&6 - fi -fi - -# Make sure LD is an absolute path. -if test -z "$LD"; then - ac_prog=ld - if test "$with_gcc" = yes; then - # Check if gcc -print-prog-name=ld gives a path. - echo $ac_n "checking for ld used by GCC... $ac_c" 1>&6 - echo "$progname:624: checking for ld used by GCC" >&5 - ac_prog=`($CC -print-prog-name=ld) 2>&5` - case "$ac_prog" in - # Accept absolute paths. - /* | [A-Za-z]:\\*) - test -z "$LD" && LD="$ac_prog" - ;; - "") - # If it fails, then pretend we are not using GCC. - ac_prog=ld - ;; - *) - # If it is relative, then search for the first ld in PATH. - with_gnu_ld=unknown - ;; - esac - elif test "$with_gnu_ld" = yes; then - echo $ac_n "checking for GNU ld... $ac_c" 1>&6 - echo "$progname:642: checking for GNU ld" >&5 - else - echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6 - echo "$progname:645: checking for non-GNU ld" >&5 - fi - - if test -z "$LD"; then - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$ac_prog"; then - LD="$ac_dir/$ac_prog" - # Check to see if the program is GNU ld. I'd rather use --version, - # but apparently some GNU ld's only accept -v. - # Break only if it was the GNU/non-GNU ld that we prefer. - if "$LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then - test "$with_gnu_ld" != no && break - else - test "$with_gnu_ld" != yes && break - fi - fi - done - IFS="$ac_save_ifs" - fi - - if test -n "$LD"; then - echo "$ac_t$LD" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - - if test -z "$LD"; then - echo "$progname: error: no acceptable ld found in \$PATH" 1>&2 - exit 1 - fi -fi - -# Check to see if it really is or is not GNU ld. -echo $ac_n "checking if the linker ($LD) is GNU ld... $ac_c" 1>&6 -# I'd rather use --version here, but apparently some GNU ld's only accept -v. -if $LD -v 2>&1 &5; then - with_gnu_ld=yes -else - with_gnu_ld=no -fi -echo "$ac_t$with_gnu_ld" 1>&6 - -# See if the linker supports building shared libraries. -echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6 - -allow_undefined_flag= -no_undefined_flag= -archive_cmds= -old_archive_from_new_cmds= -export_dynamic_flag_spec= -hardcode_libdir_flag_spec= -hardcode_libdir_separator= -hardcode_direct=no -hardcode_minus_L=no -hardcode_shlibpath_var=unsupported -runpath_var= - -case "$host_os" in -amigaos* | sunos4*) - # On these operating systems, we should treat GNU ld like the system ld. - gnu_ld_acts_native=yes - ;; -*) - gnu_ld_acts_native=no - ;; -esac - -ld_shlibs=yes -if test "$with_gnu_ld" = yes && test "$gnu_ld_acts_native" != yes; then - - # See if GNU ld supports shared libraries. - if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - archive_cmds='$CC -shared ${wl}-soname $wl$soname -o $lib$libobjs' - runpath_var=LD_RUN_PATH - ld_shlibs=yes - else - ld_shlibs=no - fi - - if test "$ld_shlibs" = yes; then - hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' - export_dynamic_flag_spec='${wl}--export-dynamic' - fi -else - # PORTME fill in a description of your system's linker (not GNU ld) - case "$host_os" in - aix3*) - allow_undefined_flag=unsupported - archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$LD -o $objdir/$soname$libobjs -bE:$lib.exp -T512 -H512 -bM:SRE;$AR cru $lib $objdir/$soname' - # Note: this linker hardcodes the directories in LIBPATH if there - # are no directories specified by -L. - hardcode_minus_L=yes - if test "$with_gcc" = yes && test -z "$link_static_flag"; then - # Neither direct hardcoding nor static linking is supported with a - # broken collect2. - hardcode_direct=unsupported - fi - ;; - - aix4*) - allow_undefined_flag=unsupported - archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$CC -o $objdir/$soname$libobjs ${wl}-bE:$lib.exp ${wl}-bM:SRE ${wl}-bnoentry;$AR cru $lib $objdir/$soname' - hardcode_direct=yes - hardcode_minus_L=yes - ;; - - amigaos*) - archive_cmds='$rm $objdir/a2ixlibrary.data;$echo "#define NAME $libname" > $objdir/a2ixlibrary.data;$echo "#define LIBRARY_ID 1" >> $objdir/a2ixlibrary.data;$echo "#define VERSION $major" >> $objdir/a2ixlibrary.data;$echo "#define REVISION $revision" >> $objdir/a2ixlibrary.data;$AR cru $lib$libobjs;$RANLIB $lib;(cd $objdir && a2ixlibrary -32)' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - ;; - - # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor - # support. Future versions do this automatically, but an explicit c++rt0.o - # does not break anything, and helps significantly (at the cost of a little - # extra space). - freebsd2.2*) - archive_cmds='$LD -Bshareable -o $lib$libobjs /usr/lib/c++rt0.o' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. - freebsd2*) - archive_cmds='$LD -Bshareable -o $lib$libobjs' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - # FreeBSD 3, at last, uses gcc -shared to do shared libraries. - freebsd3*) - archive_cmds='$CC -shared -o $lib$libobjs' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - hpux9*) - archive_cmds='$rm $objdir/$soname;$LD -b +s +b $install_libdir -o $objdir/$soname$libobjs;mv $objdir/$soname $lib' - hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - export_dynamic_flag_spec='${wl}-E' - ;; - - hpux10*) - archive_cmds='$LD -b +h $soname +s +b $install_libdir -o $lib$libobjs' - hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - export_dynamic_flag_spec='${wl}-E' - ;; - - irix5* | irix6*) - archive_cmds='$LD -shared -o $lib -soname $soname -set_version $verstring$libobjs' - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - ;; - - netbsd*) - # Tested with NetBSD 1.2 ld - archive_cmds='$LD -Bshareable -o $lib$libobjs' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - openbsd*) - archive_cmds='$LD -Bshareable -o $lib$libobjs' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - os2*) - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - allow_undefined_flag=unsupported - archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $objdir/$libname.def;$echo "DESCRIPTION \"$libname\"" >> $objdir/$libname.def;$echo DATA >> $objdir/$libname.def;$echo " SINGLE NONSHARED" >> $objdir/$libname.def;$echo EXPORTS >> $objdir/$libname.def;emxexp$libobjs >> $objdir/$libname.def;$CC -Zdll -Zcrtdll -o $lib$libobjs $objdir/$libname.def' - old_archive_from_new_cmds='emximp -o $objdir/$libname.a $objdir/$libname.def' - ;; - - osf3* | osf4*) - allow_undefined_flag=' -expect_unresolved \*' - archive_cmds='$LD -shared${allow_undefined_flag} -o $lib -soname $soname -set_version $verstring$libobjs$deplibs' - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - ;; - - sco3.2v5*) - archive_cmds='$LD -G -o $lib$libobjs' - hardcode_direct=yes - ;; - - solaris2*) - no_undefined_flag=' -z text' - archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib$libobjs' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_shlibpath_var=no - - # Solaris 2 before 2.5 hardcodes -L paths. - case "$host_os" in - solaris2.[0-4]*) - hardcode_minus_L=yes - ;; - esac - ;; - - sunos4*) - if test "$with_gcc" = yes; then - archive_cmds='$CC -shared -o $lib$libobjs' - else - archive_cmds='$LD -assert pure-text -Bstatic -o $lib$libobjs' - fi - - if test "$with_gnu_ld" = yes; then - export_dynamic_flag_spec='${wl}-export-dynamic' - fi - hardcode_libdir_flag_spec='-L$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - uts4*) - archive_cmds='$LD -G -h $soname -o $lib$libobjs' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_direct=no - hardcode_minus_L=no - hardcode_shlibpath_var=no - ;; - - *) - ld_shlibs=no - can_build_shared=no - ;; - esac -fi -echo "$ac_t$ld_shlibs" 1>&6 - -if test -z "$NM"; then - echo $ac_n "checking for BSD-compatible nm... $ac_c" 1>&6 - case "$NM" in - /* | [A-Za-z]:\\*) ;; # Let the user override the test with a path. - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in /usr/ucb /usr/ccs/bin $PATH /bin; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/nm; then - # Check to see if the nm accepts a BSD-compat flag. - # Adding the `sed 1q' prevents false positives on HP-UX, which says: - # nm: unknown option "B" ignored - if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then - NM="$ac_dir/nm -B" - elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then - NM="$ac_dir/nm -p" - else - NM="$ac_dir/nm" - fi - break - fi - done - IFS="$ac_save_ifs" - test -z "$NM" && NM=nm - ;; - esac - echo "$ac_t$NM" 1>&6 -fi - -# Check for command to grab the raw symbol name followed by C symbol from nm. -echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6 - -# These are sane defaults that work on at least a few old systems. -# [They come from Ultrix. What could be older than Ultrix?!! ;)] - -# Character class describing NM global symbol codes. -symcode='[BCDEGRSTU]' - -# Regexp to match symbols that can be accessed directly from C. -sympat='\([_A-Za-z][_A-Za-z0-9]*\)' - -# Transform the above into a raw symbol and a C symbol. -symxfrm='\1 \1' - -# Define system-specific variables. -case "$host_os" in -aix*) - symcode='[BCDTU]' - ;; -irix*) - # Cannot use undefined symbols on IRIX because inlined functions mess us up. - symcode='[BCDEGRST]' - ;; -solaris2*) - symcode='[BDTU]' - ;; -esac - -# If we're using GNU nm, then use its standard symbol codes. -if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then - symcode='[ABCDGISTUW]' -fi - -# Write the raw and C identifiers. -global_symbol_pipe="sed -n -e 's/^.* $symcode $sympat$/$symxfrm/p'" - -# Check to see that the pipe works correctly. -pipe_works=no -$rm conftest* -cat > conftest.c <&5 -if { (eval echo $progname:972: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.o; then - # Now try to grab the symbols. - nlist=conftest.nm - if { echo "$progname:975: eval \"$NM conftest.o | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.o | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then - - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - wcout=`wc "$nlist" 2>/dev/null` - count=`$echo "X$wcout" | $Xsed -e 's/^[ ]*\([0-9][0-9]*\).*$/\1/'` - (test "$count" -ge 0) 2>/dev/null || count=-1 - else - rm -f "$nlist"T - count=-1 - fi - - # Make sure that we snagged all the symbols we need. - if egrep ' nm_test_var$' "$nlist" >/dev/null; then - if egrep ' nm_test_func$' "$nlist" >/dev/null; then - cat < conftest.c -#ifdef __cplusplus -extern "C" { -#endif - -EOF - # Now generate the symbol file. - sed 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> conftest.c - - cat <> conftest.c -#if defined (__STDC__) && __STDC__ -# define __ptr_t void * -#else -# define __ptr_t char * -#endif - -/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */ -int dld_preloaded_symbol_count = $count; - -/* The mapping between symbol names and symbols. */ -struct { - char *name; - __ptr_t address; -} -dld_preloaded_symbols[] = -{ -EOF - sed 's/^\(.*\) \(.*\)$/ {"\1", (__ptr_t) \&\2},/' < "$nlist" >> conftest.c - cat <<\EOF >> conftest.c - {0, (__ptr_t) 0} -}; - -#ifdef __cplusplus -} -#endif -EOF - # Now try linking the two files. - mv conftest.o conftestm.o - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS='conftestm.o' - CFLAGS="$CFLAGS$no_builtin_flag" - if { (eval echo $progname:1033: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then - pipe_works=yes - else - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 - fi - LIBS="$save_LIBS" - else - echo "cannot find nm_test_func in $nlist" >&5 - fi - else - echo "cannot find nm_test_var in $nlist" >&5 - fi - else - echo "cannot run $global_symbol_pipe" >&5 - fi -else - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 -fi -$rm conftest* - -# Do not use the global_symbol_pipe unless it works. -echo "$ac_t$pipe_works" 1>&6 -test "$pipe_works" = yes || global_symbol_pipe= - -# Check hardcoding attributes. -echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6 -hardcode_action= -if test -n "$hardcode_libdir_flag_spec" || \ - test -n "$runpath_var"; then - - # We can hardcode non-existant directories. - if test "$hardcode_direct" != no && \ - test "$hardcode_minus_L" != no && \ - test "$hardcode_shlibpath_var" != no; then - - # Linking always hardcodes the temporary library directory. - hardcode_action=relink - else - # We can link without hardcoding, and we can hardcode nonexisting dirs. - hardcode_action=immediate - fi -elif test "$hardcode_direct" != yes && \ - test "$hardcode_minus_L" != yes && \ - test "$hardcode_shlibpath_var" != yes; then - # We cannot hardcode anything. - hardcode_action=unsupported -else - # We can only hardcode existing directories. - hardcode_action=relink -fi -echo "$ac_t$hardcode_action" 1>&6 -test "$hardcode_action" = unsupported && can_build_shared=no - - -reload_flag= -reload_cmds='$LD$reload_flag -o $output$reload_objs' -echo $ac_n "checking for $LD option to reload object files... $ac_c" 1>&6 -# PORTME Some linker may need a different reload flag. -reload_flag='-r' -echo "$ac_t$reload_flag" -test -n "$reload_flag" && reload_flag=" $reload_flag" - -# PORTME Fill in your ld.so characteristics -library_names_spec= -libname_spec='lib$name' -soname_spec= -postinstall_cmds= -postuninstall_cmds= -finish_cmds= -finish_eval= -shlibpath_var= -version_type=none -dynamic_linker="$host_os ld.so" - -echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6 -case "$host_os" in -aix3* | aix4*) - version_type=linux - library_names_spec='${libname}${release}.so.$versuffix $libname.a' - shlibpath_var=LIBPATH - - # AIX has no versioning support, so we append a major version to the name. - soname_spec='${libname}${release}.so.$major' - ;; - -amigaos*) - library_names_spec='$libname.ixlibrary $libname.a' - # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' - ;; - -freebsd2* | freebsd3*) - version_type=sunos - library_names_spec='${libname}${release}.so.$versuffix $libname.so' - finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - ;; - -gnu*) - version_type=sunos - library_names_spec='${libname}${release}.so.$versuffix' - shlibpath_var=LD_LIBRARY_PATH - ;; - -hpux9* | hpux10*) - # Give a soname corresponding to the major version so that dld.sl refuses to - # link against other versions. - dynamic_linker="$host_os dld.sl" - version_type=sunos - shlibpath_var=SHLIB_PATH - library_names_spec='${libname}${release}.sl.$versuffix ${libname}${release}.sl.$major $libname.sl' - soname_spec='${libname}${release}.sl.$major' - # HP-UX runs *really* slowly unless shared libraries are mode 555. - postinstall_cmds='chmod 555 $lib' - ;; - -irix5* | irix6*) - version_type=osf - soname_spec='${libname}${release}.so' - library_names_spec='${libname}${release}.so.$versuffix $libname.so' - shlibpath_var=LD_LIBRARY_PATH - ;; - -# No shared lib support for Linux oldld, aout, or coff. -linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) - dynamic_linker=no - ;; - -# This must be Linux ELF. -linux-gnu*) - version_type=linux - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so' - soname_spec='${libname}${release}.so.$major' - finish_cmds='PATH="$PATH:/sbin" ldconfig -n $libdir' - shlibpath_var=LD_LIBRARY_PATH - - if test -f /lib/ld.so.1; then - dynamic_linker='GNU ld.so' - else - # Only the GNU ld.so supports shared libraries on MkLinux. - case "$host_cpu" in - powerpc*) dynamic_linker=no ;; - *) dynamic_linker='Linux ld.so' ;; - esac - fi - ;; - -netbsd* | openbsd*) - version_type=sunos - library_names_spec='${libname}${release}.so.$versuffix' - finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - ;; - -os2*) - libname_spec='$name' - library_names_spec='$libname.dll $libname.a' - dynamic_linker='OS/2 ld.exe' - shlibpath_var=LIBPATH - ;; - -osf3* | osf4*) - version_type=osf - soname_spec='${libname}${release}.so' - library_names_spec='${libname}${release}.so.$versuffix $libname.so' - shlibpath_var=LD_LIBRARY_PATH - ;; - -sco3.2v5*) - version_type=osf - soname_spec='${libname}${release}.so.$major' - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so' - shlibpath_var=LD_LIBRARY_PATH - ;; - -solaris2*) - version_type=linux - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so' - soname_spec='${libname}${release}.so.$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -sunos4*) - version_type=sunos - library_names_spec='${libname}${release}.so.$versuffix' - finish_cmds='PATH="$PATH:/usr/etc" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - ;; - -sysv4.2uw2*) - version_type=linux - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so' - soname_spec='${libname}${release}.so.$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -uts4*) - version_type=linux - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so' - soname_spec='${libname}${release}.so.$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -*) - dynamic_linker=no - ;; -esac -echo "$ac_t$dynamic_linker" -test "$dynamic_linker" = no && can_build_shared=no - -# Report the final consequences. -echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6 - -echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6 -test "$can_build_shared" = "no" && enable_shared=no - -# On AIX, shared libraries and static libraries use the same namespace, and -# are all built from PIC. -case "$host_os" in -aix*) - test "$enable_shared" = yes && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds;\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; -esac - -echo "$ac_t$enable_shared" 1>&6 - -# Make sure either enable_shared or enable_static is yes. -test "$enable_shared" = yes || enable_static=yes - -echo "checking whether to build static libraries... $enable_static" 1>&6 - -echo $ac_n "checking for objdir... $ac_c" 1>&6 -rm -f .libs 2>/dev/null -mkdir .libs 2>/dev/null -if test -d .libs; then - objdir=.libs -else - # MS-DOS does not allow filenames that begin with a dot. - objdir=_libs -fi -rmdir .libs 2>/dev/null -echo "$ac_t$objdir" 1>&6 - -# Copy echo and quote the copy, instead of the original, because it is -# used later. -ltecho="$echo" - -# Now quote all the things that may contain metacharacters. -for var in ltecho old_CC old_CFLAGS old_CPPFLAGS old_LD old_NM old_RANLIB \ - old_LN_S AR CC LD LN_S NM reload_flag reload_cmds wl pic_flag \ - link_static_flag no_builtin_flag export_dynamic_flag_spec \ - libname_spec library_names_spec soname_spec RANLIB \ - old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ - old_postuninstall_cmds archive_cmds postinstall_cmds postuninstall_cmds \ - allow_undefined_flag no_undefined_flag \ - finish_cmds finish_eval global_symbol_pipe \ - hardcode_libdir_flag_spec hardcode_libdir_separator; do - - case "$var" in - reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ - old_postinstall_cmds | old_postuninstall_cmds | archive_cmds | \ - postinstall_cmds | postuninstall_cmds | finish_cmds) - # Double-quote double-evaled strings. - eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\"\`" - ;; - *) - eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`" - ;; - esac -done - -ofile=libtool -trap "$rm $ofile; exit 1" 1 2 15 -echo creating $ofile -$rm $ofile -cat < $ofile -#! /bin/sh - -# libtool - Provide generalized library-building support services. -# Generated automatically by $PROGRAM - GNU $PACKAGE $VERSION -# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh. -# -# Copyright (C) 1996-1998 Free Software Foundation, Inc. -# Gordon Matzigkeit , 1996 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# This program was configured as follows, -# on host `(hostname || uname -n) 2>/dev/null | sed 1q`: -# -# CC="$old_CC" CFLAGS="$old_CFLAGS" CPPFLAGS="$old_CPPFLAGS" \\ -# LD="$old_LD" NM="$old_NM" RANLIB="$old_RANLIB" LN_S="$old_LN_S" \\ -# $0$ltconfig_args -# -# Compiler and other test output produced by $progname, useful for -# debugging $progname, is in ./config.log if it exists. - -# Sed that helps us avoid accidentally triggering echo(1) options like -n. -Xsed="sed -e s/^X//" - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test "\${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi - -# An echo program that does not interpret backslashes. -echo="$ltecho" - -# The version of $progname that generated this script. -LTCONFIG_VERSION="$VERSION" - -# Shell to use when invoking shell scripts. -SHELL=${CONFIG_SHELL-/bin/sh} - -# Whether or not to build libtool libraries. -build_libtool_libs=$enable_shared - -# Whether or not to build old-style libraries. -build_old_libs=$enable_static - -# The host system. -host_alias="$host_alias" -host="$host" - -# The archiver. -AR="$AR" - -# The default C compiler. -CC="$CC" - -# The linker used to build libraries. -LD="$LD" - -# Whether we need hard or soft links. -LN_S="$LN_S" - -# A BSD-compatible nm program. -NM="$NM" - -# The name of the directory that contains temporary libtool files. -objdir="$objdir" - -# How to create reloadable object files. -reload_flag="$reload_flag" -reload_cmds="$reload_cmds" - -# How to pass a linker flag through the compiler. -wl="$wl" - -# Additional compiler flags for building library objects. -pic_flag="$pic_flag" - -# Compiler flag to prevent dynamic linking. -link_static_flag="$link_static_flag" - -# Compiler flag to turn off builtin functions. -no_builtin_flag="$no_builtin_flag" - -# Compiler flag to allow reflexive dlopens. -export_dynamic_flag_spec="$export_dynamic_flag_spec" - -# Library versioning type. -version_type=$version_type - -# Format of library name prefix. -libname_spec="$libname_spec" - -# List of archive names. First name is the real one, the rest are links. -# The last name is the one that the linker finds with -lNAME. -library_names_spec="$library_names_spec" - -# The coded name of the library, if different from the real name. -soname_spec="$soname_spec" - -# Commands used to build and install an old-style archive. -RANLIB="$RANLIB" -old_archive_cmds="$old_archive_cmds" -old_postinstall_cmds="$old_postinstall_cmds" -old_postuninstall_cmds="$old_postuninstall_cmds" - -# Create an old-style archive from a shared archive. -old_archive_from_new_cmds="$old_archive_from_new_cmds" - -# Commands used to build and install a shared archive. -archive_cmds="$archive_cmds" -postinstall_cmds="$postinstall_cmds" -postuninstall_cmds="$postuninstall_cmds" - -# Flag that allows shared libraries with undefined symbols to be built. -allow_undefined_flag="$allow_undefined_flag" - -# Flag that forces no undefined symbols. -no_undefined_flag="$no_undefined_flag" - -# Commands used to finish a libtool library installation in a directory. -finish_cmds="$finish_cmds" - -# Same as above, but a single script fragment to be evaled but not shown. -finish_eval="$finish_eval" - -# Take the output of nm and produce a listing of raw symbols and C names. -global_symbol_pipe="$global_symbol_pipe" - -# This is the shared library runtime path variable. -runpath_var=$runpath_var - -# This is the shared library path variable. -shlibpath_var=$shlibpath_var - -# How to hardcode a shared library path into an executable. -hardcode_action=$hardcode_action - -# Flag to hardcode \$libdir into a binary during linking. -# This must work even if \$libdir does not exist. -hardcode_libdir_flag_spec="$hardcode_libdir_flag_spec" - -# Whether we need a single -rpath flag with a separated argument. -hardcode_libdir_separator="$hardcode_libdir_separator" - -# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the -# resulting binary. -hardcode_direct=$hardcode_direct - -# Set to yes if using the -LDIR flag during linking hardcodes DIR into the -# resulting binary. -hardcode_minus_L=$hardcode_minus_L - -# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into -# the resulting binary. -hardcode_shlibpath_var=$hardcode_shlibpath_var - -EOF - -case "$host_os" in -aix3*) - cat <<\EOF >> $ofile -# AIX sometimes has problems with the GCC collect2 program. For some -# reason, if we set the COLLECT_NAMES environment variable, the problems -# vanish in a puff of smoke. -if test "${COLLECT_NAMES+set}" != set; then - COLLECT_NAMES= - export COLLECT_NAMES -fi - -EOF - ;; -esac - -# Append the ltmain.sh script. -cat "$ltmain" >> $ofile || (rm -f $ofile; exit 1) - -chmod +x $ofile -exit 0 - -# Local Variables: -# mode:shell-script -# sh-indentation:2 -# End: diff --git a/ltmain.sh b/ltmain.sh index e9350b3..0dbca1e 100644 --- a/ltmain.sh +++ b/ltmain.sh @@ -1,8 +1,9 @@ # ltmain.sh - Provide generalized library-building support services. -# NOTE: Changing this file will not affect anything until you rerun ltconfig. +# NOTE: Changing this file will not affect anything until you rerun configure. # -# Copyright (C) 1996-1998 Free Software Foundation, Inc. -# Gordon Matzigkeit , 1996 +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 +# Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit , 1996 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,14 +24,42 @@ # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. +# Check that we have a working $echo. +if test "X$1" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift +elif test "X$1" = X--fallback-echo; then + # Avoid inline document here, it may be left over + : +elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then + # Yippee, $echo works! + : +else + # Restart under the correct shell, and then maybe $echo will work. + exec $SHELL "$0" --no-reexec ${1+"$@"} +fi + +if test "X$1" = X--fallback-echo; then + # used as fallback echo + shift + cat <&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 +# We save the old values to restore during execute mode. +if test "${LC_ALL+set}" = set; then + save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL fi +if test "${LANG+set}" = set; then + save_LANG="$LANG"; LANG=C; export LANG +fi + +# Make sure IFS has a sensible default +: ${IFS=" "} if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then echo "$modename: not configured to build any kind of library" 1>&2 @@ -72,6 +114,8 @@ run= show="$echo" show_help= execute_dlfiles= +lo2o="s/\\.lo\$/.${objext}/" +o2lo="s/\\.${objext}\$/.lo/" # Parse our command line options once, thoroughly. while test $# -gt 0 @@ -79,16 +123,16 @@ do arg="$1" shift - case "$arg" in + case $arg in -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;; *) optarg= ;; esac # If the previous option needs an argument, assign it. if test -n "$prev"; then - case "$prev" in + case $prev in execute_dlfiles) - eval "$prev=\"\$$prev \$arg\"" + execute_dlfiles="$execute_dlfiles $arg" ;; *) eval "$prev=\$arg" @@ -101,16 +145,26 @@ do fi # Have we seen a non-optional argument yet? - case "$arg" in + case $arg in --help) show_help=yes ;; --version) - echo "$PROGRAM (GNU $PACKAGE) $VERSION" + echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP" + exit 0 + ;; + + --config) + ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $0 exit 0 ;; + --debug) + echo "$progname: enabling shell trace mode" + set -x + ;; + --dry-run | -n) run=: ;; @@ -135,6 +189,8 @@ do --mode) prevopt="--mode" prev=mode ;; --mode=*) mode="$optarg" ;; + --preserve-dup-deps) duplicate_deps="yes" ;; + --quiet | --silent) show=: ;; @@ -163,24 +219,29 @@ if test -n "$prevopt"; then exit 1 fi +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + if test -z "$show_help"; then # Infer the operation mode. if test -z "$mode"; then - case "$nonopt" in - *cc | *++ | gcc* | *-gcc*) + case $nonopt in + *cc | *++ | gcc* | *-gcc* | xlc*) mode=link for arg do - case "$arg" in - -c) - mode=compile - break - ;; - esac + case $arg in + -c) + mode=compile + break + ;; + esac done ;; - *db | *dbx) + *db | *dbx | *strace | *truss) mode=execute ;; *install*|cp|mv) @@ -195,11 +256,11 @@ if test -z "$show_help"; then # Just use the default operation mode. if test -z "$mode"; then - if test -n "$nonopt"; then - $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 - else - $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 - fi + if test -n "$nonopt"; then + $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 + else + $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 + fi fi ;; esac @@ -217,31 +278,118 @@ if test -z "$show_help"; then help="Try \`$modename --help --mode=$mode' for more information." # These modes are in order of execution frequency so that they run quickly. - case "$mode" in + case $mode in # libtool compile mode compile) modename="$modename: compile" # Get the compilation command and the source file. base_compile= + prev= lastarg= srcfile="$nonopt" suppress_output= + user_target=no for arg do + case $prev in + "") ;; + xcompiler) + # Aesthetically quote the previous argument. + prev= + lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + + case $arg in + # Double-quote args containing other shell metacharacters. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + + # Add the previous argument to base_compile. + if test -z "$base_compile"; then + base_compile="$lastarg" + else + base_compile="$base_compile $lastarg" + fi + continue + ;; + esac + # Accept any command-line options. - case "$arg" in + case $arg in -o) - $echo "$modename: you cannot specify the output filename with \`-o'" 1>&2 - $echo "$help" 1>&2 - exit 1 + if test "$user_target" != "no"; then + $echo "$modename: you cannot specify \`-o' more than once" 1>&2 + exit 1 + fi + user_target=next ;; -static) - build_libtool_libs=no build_old_libs=yes continue ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Wc,*) + args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"` + lastarg= + save_ifs="$IFS"; IFS=',' + for arg in $args; do + IFS="$save_ifs" + + # Double-quote args containing other shell metacharacters. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + case $arg in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + lastarg="$lastarg $arg" + done + IFS="$save_ifs" + lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"` + + # Add the arguments to base_compile. + if test -z "$base_compile"; then + base_compile="$lastarg" + else + base_compile="$base_compile $lastarg" + fi + continue + ;; + esac + + case $user_target in + next) + # The next one is the -o target name + user_target=yes + continue + ;; + yes) + # We got the output file + user_target=set + libobj="$arg" + continue + ;; esac # Accept the current argument as the source file. @@ -256,10 +404,10 @@ if test -z "$show_help"; then lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"` # Double-quote args containing other shell metacharacters. - # Many Bourne shells cannot handle close brackets correctly in scan - # sets, so we specify it separately. - case "$lastarg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + case $lastarg in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") lastarg="\"$lastarg\"" ;; esac @@ -272,12 +420,23 @@ if test -z "$show_help"; then fi done - # Get the name of the library object. - libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` + case $user_target in + set) + ;; + no) + # Get the name of the library object. + libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` + ;; + *) + $echo "$modename: you must specify a target with \`-o'" 1>&2 + exit 1 + ;; + esac # Recognize several different file suffixes. - xform='[cCFSfms]' - case "$libobj" in + # If the user specifies -o file.o, it is replaced with file.lo + xform='[cCFSfmso]' + case $libobj in *.ada) xform=ada ;; *.adb) xform=adb ;; *.ads) xform=ads ;; @@ -292,10 +451,10 @@ if test -z "$show_help"; then libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"` - case "$libobj" in - *.lo) obj=`$echo "X$libobj" | $Xsed -e 's/\.lo$/.o/'` ;; + case $libobj in + *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;; *) - $echo "$modename: cannot determine name of library object from \`$srcfile'" 1>&2 + $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2 exit 1 ;; esac @@ -308,11 +467,65 @@ if test -z "$show_help"; then # Delete any leftover library objects. if test "$build_old_libs" = yes; then - $run $rm $obj $libobj - trap "$run $rm $obj $libobj; exit 1" 1 2 15 + removelist="$obj $libobj" + else + removelist="$libobj" + fi + + $run $rm $removelist + trap "$run $rm $removelist; exit 1" 1 2 15 + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2*) + pic_mode=default + ;; + esac + if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test "$compiler_c_o" = no; then + output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext} + lockfile="$output_obj.lock" + removelist="$removelist $output_obj $lockfile" + trap "$run $rm $removelist; exit 1" 1 2 15 else - $run $rm $libobj - trap "$run $rm $libobj; exit 1" 1 2 15 + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test "$need_locks" = yes; then + until $run ln "$0" "$lockfile" 2>/dev/null; do + $show "Waiting for $lockfile to be removed" + sleep 2 + done + elif test "$need_locks" = warn; then + if test -f "$lockfile"; then + echo "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + echo $srcfile > "$lockfile" + fi + + if test -n "$fix_srcfile_path"; then + eval srcfile=\"$fix_srcfile_path\" fi # Only build a PIC object if we are building libtool libraries. @@ -320,24 +533,121 @@ if test -z "$show_help"; then # Without this assignment, base_compile gets emptied. fbsd_hideous_sh_bug=$base_compile - # All platforms use -DPIC, to notify preprocessed assembler code. - $show "$base_compile$pic_flag -DPIC $srcfile" - if $run eval "$base_compile\$pic_flag -DPIC \$srcfile"; then : + if test "$pic_mode" != no; then + # All platforms use -DPIC, to notify preprocessed assembler code. + command="$base_compile $srcfile $pic_flag -DPIC" else - test -n "$obj" && $run $rm $obj - exit 1 + # Don't build PIC code + command="$base_compile $srcfile" fi + if test "$build_old_libs" = yes; then + lo_libobj="$libobj" + dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$dir" = "X$libobj"; then + dir="$objdir" + else + dir="$dir/$objdir" + fi + libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` - # If we have no pic_flag, then copy the object into place and finish. - if test -z "$pic_flag"; then - $show "$LN_S $obj $libobj" - $run $LN_S $obj $libobj - exit $? + if test -d "$dir"; then + $show "$rm $libobj" + $run $rm $libobj + else + $show "$mkdir $dir" + $run $mkdir $dir + status=$? + if test $status -ne 0 && test ! -d $dir; then + exit $status + fi + fi fi + if test "$compiler_o_lo" = yes; then + output_obj="$libobj" + command="$command -o $output_obj" + elif test "$compiler_c_o" = yes; then + output_obj="$obj" + command="$command -o $output_obj" + fi + + $run $rm "$output_obj" + $show "$command" + if $run eval "$command"; then : + else + test -n "$output_obj" && $run $rm $removelist + exit 1 + fi + + if test "$need_locks" = warn && + test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then + echo "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + + # Just move the object if needed, then go on to compile the next one + if test x"$output_obj" != x"$libobj"; then + $show "$mv $output_obj $libobj" + if $run $mv $output_obj $libobj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + fi + + # If we have no pic_flag, then copy the object into place and finish. + if (test -z "$pic_flag" || test "$pic_mode" != default) && + test "$build_old_libs" = yes; then + # Rename the .lo from within objdir to obj + if test -f $obj; then + $show $rm $obj + $run $rm $obj + fi + + $show "$mv $libobj $obj" + if $run $mv $libobj $obj; then : + else + error=$? + $run $rm $removelist + exit $error + fi - # Just move the object, then go on to compile the next one - $show "$mv $obj $libobj" - $run $mv $obj $libobj || exit 1 + xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$obj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"` + libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` + # Now arrange that obj and lo_libobj become the same file + $show "(cd $xdir && $LN_S $baseobj $libobj)" + if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + $run $rm "$lockfile" + fi + exit 0 + else + error=$? + $run $rm $removelist + exit $error + fi + fi # Allow error messages only from the first compilation. suppress_output=' >/dev/null 2>&1' @@ -345,64 +655,170 @@ if test -z "$show_help"; then # Only build a position-dependent object if we build old libraries. if test "$build_old_libs" = yes; then + if test "$pic_mode" != yes; then + # Don't build PIC code + command="$base_compile $srcfile" + else + # All platforms use -DPIC, to notify preprocessed assembler code. + command="$base_compile $srcfile $pic_flag -DPIC" + fi + if test "$compiler_c_o" = yes; then + command="$command -o $obj" + output_obj="$obj" + fi + # Suppress compiler output if we already did a PIC compilation. - $show "$base_compile $srcfile$suppress_output" - if $run eval "$base_compile \$srcfile$suppress_output"; then : + command="$command$suppress_output" + $run $rm "$output_obj" + $show "$command" + if $run eval "$command"; then : + else + $run $rm $removelist + exit 1 + fi + + if test "$need_locks" = warn && + test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then + echo "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + + # Just move the object if needed + if test x"$output_obj" != x"$obj"; then + $show "$mv $output_obj $obj" + if $run $mv $output_obj $obj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + fi + + # Create an invalid libtool object if no PIC, so that we do not + # accidentally link it into a program. + if test "$build_libtool_libs" != yes; then + $show "echo timestamp > $libobj" + $run eval "echo timestamp > \$libobj" || exit $? else - $run $rm $obj $libobj - exit 1 + # Move the .lo from within objdir + $show "$mv $libobj $lo_libobj" + if $run $mv $libobj $lo_libobj; then : + else + error=$? + $run $rm $removelist + exit $error + fi fi fi - # Create an invalid libtool object if no PIC, so that we do not - # accidentally link it into a program. - if test "$build_libtool_libs" != yes; then - $show "echo timestamp > $libobj" - $run eval "echo timestamp > \$libobj" || exit $? + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + $run $rm "$lockfile" fi exit 0 ;; # libtool link mode - link) + link | relink) modename="$modename: link" - CC="$nonopt" - allow_undefined=yes - compile_command="$CC" - finalize_command="$CC" + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # which system we are compiling for in order to pass an extra + # flag for every libtool invokation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll which has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args="$nonopt" + compile_command="$nonopt" + finalize_command="$nonopt" + compile_rpath= + finalize_rpath= compile_shlibpath= finalize_shlibpath= + convenience= + old_convenience= deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + + avoid_version=no dlfiles= dlprefiles= + dlself=no export_dynamic=no - hardcode_libdirs= + export_symbols= + export_symbols_regex= + generated= libobjs= - link_against_libtool_libs= ltlibs= + module=no + no_install=no objs= + prefer_static_libs=no + preload=no prev= prevarg= release= rpath= + xrpath= perm_rpath= temp_rpath= + thread_safe=no vinfo= # We need to know -static, to get the right output filenames. for arg do - case "$arg" in + case $arg in -all-static | -static) - if test "X$arg" = "X-all-static" && test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then + if test "X$arg" = "X-all-static"; then + if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2 - fi - build_libtool_libs=no + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + else + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + fi + build_libtool_libs=no build_old_libs=yes - break - ;; + prefer_static_libs=yes + break + ;; esac done @@ -410,55 +826,141 @@ if test -z "$show_help"; then test -n "$old_archive_from_new_cmds" && build_old_libs=yes # Go through the arguments, transforming them on the way. - for arg - do + while test $# -gt 0; do + arg="$1" + shift + case $arg in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test + ;; + *) qarg=$arg ;; + esac + libtool_args="$libtool_args $qarg" + # If the previous option needs an argument, assign it. if test -n "$prev"; then - case "$prev" in - output) - compile_command="$compile_command @OUTPUT@" - finalize_command="$finalize_command @OUTPUT@" - ;; - esac - - case "$prev" in - dlfiles|dlprefiles) - case "$arg" in - *.la | *.lo) ;; # We handle these cases below. - *) - dlprefiles="$dlprefiles $arg" - test "$prev" = dlfiles && dlfiles="$dlfiles $arg" - prev= - ;; - esac - ;; + case $prev in + output) + compile_command="$compile_command @OUTPUT@" + finalize_command="$finalize_command @OUTPUT@" + ;; + esac + + case $prev in + dlfiles|dlprefiles) + if test "$preload" = no; then + # Add the symbol object into the linking commands. + compile_command="$compile_command @SYMFILE@" + finalize_command="$finalize_command @SYMFILE@" + preload=yes + fi + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test "$dlself" = no; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test "$prev" = dlprefiles; then + dlself=yes + elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test "$prev" = dlfiles; then + dlfiles="$dlfiles $arg" + else + dlprefiles="$dlprefiles $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols="$arg" + if test ! -f "$arg"; then + $echo "$modename: symbol file \`$arg' does not exist" + exit 1 + fi + prev= + continue + ;; + expsyms_regex) + export_symbols_regex="$arg" + prev= + continue + ;; release) release="-$arg" prev= continue ;; - rpath) - rpath="$rpath $arg" + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + $echo "$modename: only absolute run-paths are allowed" 1>&2 + exit 1 + ;; + esac + if test "$prev" = rpath; then + case "$rpath " in + *" $arg "*) ;; + *) rpath="$rpath $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) xrpath="$xrpath $arg" ;; + esac + fi prev= continue ;; - *) - eval "$prev=\"\$arg\"" - prev= - continue - ;; - esac - fi + xcompiler) + compiler_flags="$compiler_flags $qarg" + prev= + compile_command="$compile_command $qarg" + finalize_command="$finalize_command $qarg" + continue + ;; + xlinker) + linker_flags="$linker_flags $qarg" + compiler_flags="$compiler_flags $wl$qarg" + prev= + compile_command="$compile_command $wl$qarg" + finalize_command="$finalize_command $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n $prev prevarg="$arg" - case "$arg" in + case $arg in -all-static) if test -n "$link_static_flag"; then - compile_command="$compile_command $link_static_flag" + compile_command="$compile_command $link_static_flag" finalize_command="$finalize_command $link_static_flag" - fi - continue + fi + continue ;; -allow-undefined) @@ -467,100 +969,258 @@ if test -z "$show_help"; then continue ;; + -avoid-version) + avoid_version=yes + continue + ;; + -dlopen) - prev=dlfiles - continue - ;; + prev=dlfiles + continue + ;; -dlpreopen) - prev=dlprefiles - continue - ;; + prev=dlprefiles + continue + ;; -export-dynamic) - if test "$export_dynamic" != yes; then - export_dynamic=yes - if test -n "$export_dynamic_flag_spec"; then - eval arg=\"$export_dynamic_flag_spec\" - else - arg= - fi - - # Add the symbol object into the linking commands. - compile_command="$compile_command @SYMFILE@" - finalize_command="$finalize_command @SYMFILE@" - fi - ;; - - -L*) - dir=`$echo "X$arg" | $Xsed -e 's%^-L\(.*\)$%\1%'` - case "$dir" in - /* | [A-Za-z]:\\*) - # Add the corresponding hardcode_libdir_flag, if it is not identical. - ;; - *) - $echo "$modename: \`-L$dir' cannot specify a relative directory" 1>&2 - exit 1 - ;; - esac - deplibs="$deplibs $arg" - ;; - - -l*) deplibs="$deplibs $arg" ;; - - -no-undefined) - allow_undefined=no + export_dynamic=yes continue ;; - -o) prev=output ;; - - -release) - prev=release + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + $echo "$modename: more than one -exported-symbols argument is not allowed" + exit 1 + fi + if test "X$arg" = "X-export-symbols"; then + prev=expsyms + else + prev=expsyms_regex + fi continue ;; - -rpath) - prev=rpath - continue - ;; - - -static) - # If we have no pic_flag, then this is the same as -all-static. - if test -z "$pic_flag" && test -n "$link_static_flag"; then - compile_command="$compile_command $link_static_flag" - finalize_command="$finalize_command $link_static_flag" - fi + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | no/*-*-nonstopux*) + compile_command="$compile_command $arg" + finalize_command="$finalize_command $arg" + ;; + esac continue ;; - -version-info) - prev=vinfo - continue - ;; - - # Some other compiler flag. - -* | +*) - # Unknown arguments in both finalize_command and compile_command need - # to be aesthetically quoted because they are evaled later. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" + -L*) + dir=`$echo "X$arg" | $Xsed -e 's/^-L//'` + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2 + exit 1 + fi + dir="$absdir" ;; esac - ;; - - *.o | *.a) - # A standard object. - objs="$objs $arg" - ;; - - *.lo) - # A library object. - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - if test "$build_libtool_libs" = yes; then + case "$deplibs " in + *" -L$dir "*) ;; + *) + deplibs="$deplibs -L$dir" + lib_search_path="$lib_search_path $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) + case :$dllsearchpath: in + *":$dir:"*) ;; + *) dllsearchpath="$dllsearchpath:$dir";; + esac + ;; + esac + continue + ;; + + -l*) + if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then + case $host in + *-*-cygwin* | *-*-pw32* | *-*-beos*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-mingw* | *-*-os2*) + # These systems don't actually have a C library (as such) + test "X$arg" = "X-lc" && continue + ;; + *-*-openbsd* | *-*-freebsd*) + # Do not include libc due to us having libc/libc_r. + test "X$arg" = "X-lc" && continue + ;; + esac + elif test "X$arg" = "X-lc_r"; then + case $host in + *-*-openbsd* | *-*-freebsd*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + deplibs="$deplibs $arg" + continue + ;; + + -module) + module=yes + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) + # The PATH hackery in wrapper scripts is required on Windows + # in order for the loader to find any dlls it needs. + $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2 + $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2 + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -o) prev=output ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + dir=`$echo "X$arg" | $Xsed -e 's/^-R//'` + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + $echo "$modename: only absolute run-paths are allowed" 1>&2 + exit 1 + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) xrpath="$xrpath $dir" ;; + esac + continue + ;; + + -static) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -Wc,*) + args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'` + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + case $flag in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + flag="\"$flag\"" + ;; + esac + arg="$arg $wl$flag" + compiler_flags="$compiler_flags $flag" + done + IFS="$save_ifs" + arg=`$echo "X$arg" | $Xsed -e "s/^ //"` + ;; + + -Wl,*) + args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'` + arg= + save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + case $flag in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + flag="\"$flag\"" + ;; + esac + arg="$arg $wl$flag" + compiler_flags="$compiler_flags $wl$flag" + linker_flags="$linker_flags $flag" + done + IFS="$save_ifs" + arg=`$echo "X$arg" | $Xsed -e "s/^ //"` + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + # Some other compiler flag. + -* | +*) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + case $arg in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + ;; + + *.lo | *.$objext) + # A library or standard object. + if test "$prev" = dlfiles; then + # This file was specified with -dlopen. + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + dlfiles="$dlfiles $arg" prev= continue else @@ -571,230 +1231,59 @@ if test -z "$show_help"; then if test "$prev" = dlprefiles; then # Preload the old-style object. - dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e 's/\.lo$/\.o/'` + dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"` prev= + else + case $arg in + *.lo) libobjs="$libobjs $arg" ;; + *) objs="$objs $arg" ;; + esac fi - libobjs="$libobjs $arg" - ;; - - *.la) - # A libtool-controlled library. - - dlname= - libdir= - library_names= - old_library= - - # Check to see that this really is a libtool archive. - if (sed -e '2q' $arg | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then : - else - $echo "$modename: \`$arg' is not a valid libtool archive" 1>&2 - exit 1 - fi - - # If there is no directory component, then add one. - case "$arg" in - */* | *\\*) . $arg ;; - *) . ./$arg ;; - esac - - if test -z "$libdir"; then - $echo "$modename: \`$arg' contains no -rpath information" 1>&2 - exit 1 - fi - - # Get the name of the library we link against. - linklib= - for l in $old_library $library_names; do - linklib="$l" - done - - if test -z "$linklib"; then - $echo "$modename: cannot find name of link library for \`$arg'" 1>&2 - exit 1 - fi - - # Find the relevant object directory and library name. - name=`$echo "X$arg" | $Xsed -e 's%^.*/%%' -e 's/\.la$//' -e 's/^lib//'` - dir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` - if test "X$dir" = "X$arg"; then - dir="$objdir" - else - dir="$dir/$objdir" - fi - - # This library was specified with -dlopen. - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - if test -z "$dlname"; then - # If there is no dlname, we need to preload. - prev=dlprefiles - else - # We should not create a dependency on this library, but we - # may need any libraries it requires. - compile_command="$compile_command$dependency_libs" - finalize_command="$finalize_command$dependency_libs" - prev= - continue - fi - fi - - # The library was specified with -dlpreopen. - if test "$prev" = dlprefiles; then - # Prefer using a static library (so that no silly _DYNAMIC symbols - # are required to link). - if test -n "$old_library"; then - dlprefiles="$dlprefiles $dir/$old_library" - else - dlprefiles="$dlprefiles $dir/$linklib" - fi - prev= - fi - - if test "$build_libtool_libs" = yes && test -n "$library_names"; then - link_against_libtool_libs="$link_against_libtool_libs $arg" - if test -n "$shlibpath_var"; then - # Make sure the rpath contains only unique directories. - case "$temp_rpath " in - *" $dir "*) ;; - *) temp_rpath="$temp_rpath $dir" ;; - esac - fi + ;; - # This is the magic to use -rpath. - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - # Put the magic libdir with the hardcode flag. - hardcode_libdirs="$libdir" - libdir="@HARDCODE_LIBDIRS@" - else - # Just accumulate the unique libdirs. - case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - libdir= - fi - fi - - if test -n "$libdir"; then - eval flag=\"$hardcode_libdir_flag_spec\" - - compile_command="$compile_command $flag" - finalize_command="$finalize_command $flag" - fi - elif test -n "$runpath_var"; then - # Do the same for the permanent run path. - case "$perm_rpath " in - *" $libdir "*) ;; - *) perm_rpath="$perm_rpath $libdir" ;; - esac - fi - - - case "$hardcode_action" in - immediate) - if test "$hardcode_direct" = no; then - compile_command="$compile_command $dir/$linklib" - elif test "$hardcode_minus_L" = no; then - compile_command="$compile_command -L$dir -l$name" - elif test "$hardcode_shlibpath_var" = no; then - compile_shlibpath="$compile_shlibpath$dir:" - compile_command="$compile_command -l$name" - fi - ;; - - relink) - # We need an absolute path. - case "$dir" in - /* | [A-Za-z]:\\*) ;; - *) - absdir=`cd "$dir" && pwd` - if test -z "$absdir"; then - $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2 - exit 1 - fi - dir="$absdir" - ;; - esac - - if test "$hardcode_direct" = yes; then - compile_command="$compile_command $dir/$linklib" - elif test "$hardcode_minus_L" = yes; then - compile_command="$compile_command -L$dir -l$name" - elif test "$hardcode_shlibpath_var" = yes; then - compile_shlibpath="$compile_shlibpath$dir:" - compile_command="$compile_command -l$name" - fi - ;; - - *) - $echo "$modename: \`$hardcode_action' is an unknown hardcode action" 1>&2 - exit 1 - ;; - esac - - # Finalize command for both is simple: just hardcode it. - if test "$hardcode_direct" = yes; then - finalize_command="$finalize_command $libdir/$linklib" - elif test "$hardcode_minus_L" = yes; then - finalize_command="$finalize_command -L$libdir -l$name" - elif test "$hardcode_shlibpath_var" = yes; then - finalize_shlibpath="$finalize_shlibpath$libdir:" - finalize_command="$finalize_command -l$name" - else - # We cannot seem to hardcode it, guess we'll fake it. - finalize_command="$finalize_command -L$libdir -l$name" - fi - else - # Transform directly to old archives if we don't build new libraries. - if test -n "$pic_flag" && test -z "$old_library"; then - $echo "$modename: cannot find static library for \`$arg'" 1>&2 - exit 1 - fi + *.$libext) + # An archive. + deplibs="$deplibs $arg" + old_deplibs="$old_deplibs $arg" + continue + ;; - # Here we assume that one of hardcode_direct or hardcode_minus_L - # is not unsupported. This is valid on all known static and - # shared platforms. - if test "$hardcode_direct" != unsupported; then - test -n "$old_library" && linklib="$old_library" - compile_command="$compile_command $dir/$linklib" - finalize_command="$finalize_command $dir/$linklib" - else - compile_command="$compile_command -L$dir -l$name" - finalize_command="$finalize_command -L$dir -l$name" - fi - fi + *.la) + # A libtool-controlled library. - # Add in any libraries that this one depends upon. - compile_command="$compile_command$dependency_libs" - finalize_command="$finalize_command$dependency_libs" + if test "$prev" = dlfiles; then + # This library was specified with -dlopen. + dlfiles="$dlfiles $arg" + prev= + elif test "$prev" = dlprefiles; then + # The library was specified with -dlpreopen. + dlprefiles="$dlprefiles $arg" + prev= + else + deplibs="$deplibs $arg" + fi continue - ;; + ;; # Some other compiler argument. *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) + case $arg in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac - ;; - esac + ;; + esac # arg # Now actually substitute the argument into the commands. if test -n "$arg"; then compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" fi - done + done # argument parsing loop if test -n "$prev"; then $echo "$modename: the \`$prevarg' option requires an argument" 1>&2 @@ -802,722 +1291,2517 @@ if test -z "$show_help"; then exit 1 fi - if test -n "$vinfo" && test -n "$release"; then - $echo "$modename: you cannot specify both \`-version-info' and \`-release'" 1>&2 - $echo "$help" 1>&2 - exit 1 + if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + compile_command="$compile_command $arg" + finalize_command="$finalize_command $arg" + fi + + # calculate the name of the file, without its directory + outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'` + libobjs_save="$libobjs" + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` + if test "X$output_objdir" = "X$output"; then + output_objdir="$objdir" + else + output_objdir="$output_objdir/$objdir" + fi + # Create the object directory. + if test ! -d $output_objdir; then + $show "$mkdir $output_objdir" + $run $mkdir $output_objdir + status=$? + if test $status -ne 0 && test ! -d $output_objdir; then + exit $status + fi fi - oldlib= - oldobjs= - case "$output" in + # Determine the type of output + case $output in "") $echo "$modename: you must specify an output file" 1>&2 $echo "$help" 1>&2 exit 1 ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac - */* | *\\*) - $echo "$modename: output file \`$output' must have no directory components" 1>&2 - exit 1 - ;; + specialdeplibs= + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if test "X$duplicate_deps" = "Xyes" ; then + case "$libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + fi + libs="$libs $deplib" + done + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + case $linkmode in + lib) + passes="conv link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2 + exit 1 + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=no + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + for pass in $passes; do + if test $linkmode = prog; then + # Determine which files to process + case $pass in + dlopen) + libs="$dlfiles" + save_deplibs="$deplibs" # Collect dlpreopened libraries + deplibs= + ;; + dlpreopen) libs="$dlprefiles" ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + for deplib in $libs; do + lib= + found=no + case $deplib in + -l*) + if test $linkmode = oldlib && test $linkmode = obj; then + $echo "$modename: warning: \`-l' is ignored for archives/objects: $deplib" 1>&2 + continue + fi + if test $pass = conv; then + deplibs="$deplib $deplibs" + continue + fi + name=`$echo "X$deplib" | $Xsed -e 's/^-l//'` + for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do + # Search the libtool library + lib="$searchdir/lib${name}.la" + if test -f "$lib"; then + found=yes + break + fi + done + if test "$found" != yes; then + # deplib doesn't seem to be a libtool library + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; # -l + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test $pass = conv && continue + newdependency_libs="$deplib $newdependency_libs" + newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` + ;; + prog) + if test $pass = conv; then + deplibs="$deplib $deplibs" + continue + fi + if test $pass = scan; then + deplibs="$deplib $deplibs" + newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + ;; + *) + $echo "$modename: warning: \`-L' is ignored for archives/objects: $deplib" 1>&2 + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test $pass = link; then + dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'` + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) xrpath="$xrpath $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) lib="$deplib" ;; + *.$libext) + if test $pass = conv; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + if test "$deplibs_check_method" != pass_all; then + echo + echo "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not used here." + else + echo + echo "*** Warning: Linking the shared library $output against the" + echo "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + fi + continue + ;; + prog) + if test $pass != link; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + newdlprefiles="$newdlprefiles $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + newdlfiles="$newdlfiles $deplib" + fi + continue + ;; + %DEPLIBS%) + alldeplibs=yes + continue + ;; + esac # case $deplib + if test $found = yes || test -f "$lib"; then : + else + $echo "$modename: cannot find the library \`$lib'" 1>&2 + exit 1 + fi + + # Check to see that this really is a libtool archive. + if (${SED} -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + + ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'` + test "X$ladir" = "X$lib" && ladir="." + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + # If the library was installed with an old release of libtool, + # it will not redefine variable installed. + installed=yes + + # Read the .la file + case $lib in + */* | *\\*) . $lib ;; + *) . ./$lib ;; + esac + + if test "$linkmode,$pass" = "lib,link" || + test "$linkmode,$pass" = "prog,scan" || + { test $linkmode = oldlib && test $linkmode = obj; }; then + # Add dl[pre]opened files of deplib + test -n "$dlopen" && dlfiles="$dlfiles $dlopen" + test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen" + fi + + if test $pass = conv; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 + exit 1 + fi + # It is a libtool convenience library, so add in its objects. + convenience="$convenience $ladir/$objdir/$old_library" + old_convenience="$old_convenience $ladir/$objdir/$old_library" + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if test "X$duplicate_deps" = "Xyes" ; then + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + fi + tmp_libs="$tmp_libs $deplib" + done + elif test $linkmode != prog && test $linkmode != lib; then + $echo "$modename: \`$lib' is not a convenience library" 1>&2 + exit 1 + fi + continue + fi # $pass = conv + + # Get the name of the library we link against. + linklib= + for l in $old_library $library_names; do + linklib="$l" + done + if test -z "$linklib"; then + $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 + exit 1 + fi + + # This library was specified with -dlopen. + if test $pass = dlopen; then + if test -z "$libdir"; then + $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2 + exit 1 + fi + if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. + dlprefiles="$dlprefiles $lib" + else + newdlfiles="$newdlfiles $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2 + $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 + abs_ladir="$ladir" + fi + ;; + esac + laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + + # Find the relevant object directory and library name. + if test "X$installed" = Xyes; then + if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + $echo "$modename: warning: library \`$lib' was moved." 1>&2 + dir="$ladir" + absdir="$abs_ladir" + libdir="$abs_ladir" + else + dir="$libdir" + absdir="$libdir" + fi + else + dir="$ladir/$objdir" + absdir="$abs_ladir/$objdir" + # Remove this search path later + notinst_path="$notinst_path $abs_ladir" + fi # $installed = yes + name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` + + # This library was specified with -dlpreopen. + if test $pass = dlpreopen; then + if test -z "$libdir"; then + $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2 + exit 1 + fi + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + newdlprefiles="$newdlprefiles $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + newdlprefiles="$newdlprefiles $dir/$dlname" + else + newdlprefiles="$newdlprefiles $dir/$linklib" + fi + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test $linkmode = lib; then + deplibs="$dir/$old_library $deplibs" + elif test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" + fi + continue + fi + + if test $linkmode = prog && test $pass != link; then + newlib_search_path="$newlib_search_path $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=no + if test "$link_all_deplibs" != no || test -z "$library_names" || + test "$build_libtool_libs" = no; then + linkalldeplibs=yes + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test + esac + # Need to link against all dependency_libs? + if test $linkalldeplibs = yes; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if test "X$duplicate_deps" = "Xyes" ; then + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + fi + tmp_libs="$tmp_libs $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + link_static=no # Whether the deplib will be linked statically + if test -n "$library_names" && + { test "$prefer_static_libs" = no || test -z "$old_library"; }; then + # Link against this shared library + + if test "$linkmode,$pass" = "prog,link" || + { test $linkmode = lib && test $hardcode_into_libs = yes; }; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) compile_rpath="$compile_rpath $absdir" + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" + esac + ;; + esac + if test $linkmode = prog; then + # We need to hardcode the library path + if test -n "$shlibpath_var"; then + # Make sure the rpath contains only unique directories. + case "$temp_rpath " in + *" $dir "*) ;; + *" $absdir "*) ;; + *) temp_rpath="$temp_rpath $dir" ;; + esac + fi + fi + fi # $linkmode,$pass = prog,link... + + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + + if test "$installed" = no; then + notinst_deplibs="$notinst_deplibs $lib" + need_relink=yes + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + realname="$2" + shift; shift + libname=`eval \\$echo \"$libname_spec\"` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname="$dlname" + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin*) + major=`expr $current - $age` + versuffix="-$major" + ;; + esac + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot="$soname" + soname=`echo $soroot | ${SED} -e 's/^.*\///'` + newlib="libimp-`echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a" + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + $show "extracting exported symbol list from \`$soname'" + save_ifs="$IFS"; IFS='~' + eval cmds=\"$extract_expsyms_cmds\" + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + $show "generating import library for \`$soname'" + save_ifs="$IFS"; IFS='~' + eval cmds=\"$old_archive_from_expsyms_cmds\" + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n $old_archive_from_expsyms_cmds + + if test $linkmode = prog || test "$mode" != relink; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test "$hardcode_direct" = no; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = no; then + case $host in + *-*-sunos*) add_shlibpath="$dir" ;; + esac + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = no; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + relink) + if test "$hardcode_direct" = yes; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test "$lib_linked" != yes; then + $echo "$modename: configuration error: unsupported hardcode properties" + exit 1 + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;; + esac + fi + if test $linkmode = prog; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test "$hardcode_direct" != yes && \ + test "$hardcode_minus_L" != yes && \ + test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; + esac + fi + fi + fi + + if test $linkmode = prog || test "$mode" = relink; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test "$hardcode_direct" = yes; then + add="$libdir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$libdir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; + esac + add="-l$name" + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir="-L$libdir" + add="-l$name" + fi + + if test $linkmode = prog; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test $linkmode = prog; then + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + + # Try to link the static library + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test "$hardcode_direct" != unsupported; then + test -n "$old_library" && linklib="$old_library" + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test "$build_libtool_libs" = yes; then + # Not a shared library + if test "$deplibs_check_method" != pass_all; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + echo "*** Warning: This system can not link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test "$module" = yes; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + convenience="$convenience $dir/$old_library" + old_convenience="$old_convenience $dir/$old_library" + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test $linkmode = lib; then + if test -n "$dependency_libs" && + { test $hardcode_into_libs != yes || test $build_old_libs = yes || + test $link_static = yes; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'` + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) xrpath="$xrpath $temp_xrpath";; + esac;; + *) temp_deplibs="$temp_deplibs $libdir";; + esac + done + dependency_libs="$temp_deplibs" + fi + + newlib_search_path="$newlib_search_path $absdir" + # Link against this library + test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + if test "X$duplicate_deps" = "Xyes" ; then + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + fi + tmp_libs="$tmp_libs $deplib" + done + + if test $link_all_deplibs != no; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + case $deplib in + -L*) path="$deplib" ;; + *.la) + dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'` + test "X$dir" = "X$deplib" && dir="." + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 + absdir="$dir" + fi + ;; + esac + if grep "^installed=no" $deplib > /dev/null; then + path="-L$absdir/$objdir" + else + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + if test -z "$libdir"; then + $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 + exit 1 + fi + if test "$absdir" != "$libdir"; then + $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2 + fi + path="-L$absdir" + fi + ;; + *) continue ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$deplibs $path" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test $pass = dlpreopen; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test $pass != dlopen; then + test $pass != scan && dependency_libs="$newdependency_libs" + if test $pass != conv; then + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) lib_search_path="$lib_search_path $dir" ;; + esac + done + newlib_search_path= + fi + + if test "$linkmode,$pass" != "prog,link"; then + vars="deplibs" + else + vars="compile_deplibs finalize_deplibs" + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + *) + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) tmp_libs="$tmp_libs $deplib" ;; + esac + ;; + *) tmp_libs="$tmp_libs $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + if test "$pass" = "conv" && + { test "$linkmode" = "lib" || test "$linkmode" = "prog"; }; then + libs="$deplibs" # reset libs + deplibs= + fi + done # for pass + if test $linkmode = prog; then + dlfiles="$newdlfiles" + dlprefiles="$newdlprefiles" + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2 + fi + + if test -n "$rpath"; then + $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2 + fi + + if test -n "$xrpath"; then + $echo "$modename: warning: \`-R' is ignored for archives" 1>&2 + fi + + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for archives" 1>&2 + fi + + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2 + fi - *.a) # Now set the variables for building old libraries. build_libtool_libs=no - build_old_libs=yes - oldlib="$output" - $show "$rm $oldlib" - $run $rm $oldlib + oldlibs="$output" + objs="$objs$old_deplibs" ;; - *.la) + lib) # Make sure we only generate libraries of the form `libNAME.la'. - case "$output" in - lib*) ;; + case $outputname in + lib*) + name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` + eval libname=\"$libname_spec\" + ;; *) - $echo "$modename: libtool library \`$arg' must begin with \`lib'" 1>&2 - $echo "$help" 1>&2 - exit 1 + if test "$module" = no; then + $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + if test "$need_lib_prefix" != no; then + # Add the "lib" prefix for modules if required + name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` + eval libname=\"$libname_spec\" + else + libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` + fi ;; esac - name=`$echo "X$output" | $Xsed -e 's/\.la$//' -e 's/^lib//'` - eval libname=\"$libname_spec\" + if test -n "$objs"; then + if test "$deplibs_check_method" != pass_all; then + $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1 + exit 1 + else + echo + echo "*** Warning: Linking the shared library $output against the non-libtool" + echo "*** objects $objs is not portable!" + libobjs="$libobjs $objs" + fi + fi + + if test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2 + fi + + set dummy $rpath + if test $# -gt 2; then + $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 + fi + install_libdir="$2" + + oldlibs= + if test -z "$rpath"; then + if test "$build_libtool_libs" = yes; then + # Building a libtool convenience library. + libext=al + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2 + fi + else + + # Parse the version information argument. + save_ifs="$IFS"; IFS=':' + set dummy $vinfo 0 0 0 + IFS="$save_ifs" + + if test -n "$8"; then + $echo "$modename: too many parameters to \`-version-info'" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + current="$2" + revision="$3" + age="$4" + + # Check that each of the things are valid numbers. + case $current in + 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; + *) + $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + case $revision in + 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; + *) + $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + case $age in + 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; + *) + $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + if test $age -gt $current; then + $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + major=.`expr $current - $age` + versuffix="$major.$age.$revision" + # Darwin ld doesn't like 0 for these options... + minor_current=`expr $current + 1` + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + + freebsd-aout) + major=".$current" + versuffix=".$current.$revision"; + ;; + + freebsd-elf) + major=".$current" + versuffix=".$current"; + ;; + + irix | nonstopux) + major=`expr $current - $age + 1` + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring="$verstring_prefix$major.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test $loop != 0; do + iface=`expr $revision - $loop` + loop=`expr $loop - 1` + verstring="$verstring_prefix$major.$iface:$verstring" + done + + # Before this point, $major must not contain `.'. + major=.$major + versuffix="$major.$revision" + ;; + + linux) + major=.`expr $current - $age` + versuffix="$major.$age.$revision" + ;; + + osf) + major=.`expr $current - $age` + versuffix=".$current.$age.$revision" + verstring="$current.$age.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$age + while test $loop != 0; do + iface=`expr $current - $loop` + loop=`expr $loop - 1` + verstring="$verstring:${iface}.0" + done + + # Make executables depend on our current version. + verstring="$verstring:${current}.0" + ;; + + sunos) + major=".$current" + versuffix=".$current.$revision" + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 filesystems. + major=`expr $current - $age` + versuffix="-$major" + ;; + + *) + $echo "$modename: unknown library version type \`$version_type'" 1>&2 + echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 + exit 1 + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + verstring="0.0" + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring="" + ;; + *) + verstring="0.0" + ;; + esac + if test "$need_version" = no; then + versuffix= + else + versuffix=".0.0" + fi + fi + + # Remove version info from name if versioning should be avoided + if test "$avoid_version" = yes && test "$need_version" = no; then + major= + versuffix= + verstring="" + fi + + # Check to see if the archive will have undefined symbols. + if test "$allow_undefined" = yes; then + if test "$allow_undefined_flag" = unsupported; then + $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 + build_libtool_libs=no + build_old_libs=yes + fi + else + # Don't allow undefined symbols. + allow_undefined_flag="$no_undefined_flag" + fi + fi + + if test "$mode" != relink; then + # Remove our outputs. + $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*" + $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.* + fi + + # Now set the variables for building old libraries. + if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then + oldlibs="$oldlibs $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + for path in $notinst_path; do + lib_search_path=`echo "$lib_search_path " | ${SED} -e 's% $path % %g'` + deplibs=`echo "$deplibs " | ${SED} -e 's% -L$path % %g'` + dependency_libs=`echo "$dependency_libs " | ${SED} -e 's% -L$path % %g'` + done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + temp_xrpath="$temp_xrpath -R$libdir" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" ;; + esac + done + if test $hardcode_into_libs != yes || test $build_old_libs = yes; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles="$dlfiles" + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) dlfiles="$dlfiles $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles="$dlprefiles" + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) dlprefiles="$dlprefiles $lib" ;; + esac + done + + if test "$build_libtool_libs" = yes; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + deplibs="$deplibs -framework System" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd*) + # Do not include libc due to us having libc/libc_r. + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test $build_libtool_need_lc = "yes"; then + deplibs="$deplibs -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release="" + versuffix="" + major="" + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behaviour. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $rm conftest.c + cat > conftest.c </dev/null` + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null \ + | grep " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib="$potent_lib" + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; + *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \ + | ${SED} 10q \ + | egrep "$file_magic_regex" > /dev/null; then + newdeplibs="$newdeplibs $a_deplib" + a_deplib="" + break 2 + fi + done + done + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + echo "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + echo "*** with $libname but no candidates were found. (...for file magic test)" + else + echo "*** with $libname and none of the candidates passed a file format test" + echo "*** using a file magic. Last file checked: $potlib" + fi + fi + else + # Add a -L argument. + newdeplibs="$newdeplibs $a_deplib" + fi + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method + match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"` + for a_deplib in $deplibs; do + name="`expr $a_deplib : '-l\(.*\)'`" + # If $name is empty we are operating on a -L argument. + if test -n "$name" && test "$name" != "0"; then + libname=`eval \\$echo \"$libname_spec\"` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib="$potent_lib" # see symlink-check below in file_magic test + if eval echo \"$potent_lib\" 2>/dev/null \ + | ${SED} 10q \ + | egrep "$match_pattern_regex" > /dev/null; then + newdeplibs="$newdeplibs $a_deplib" + a_deplib="" + break 2 + fi + done + done + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + echo "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib" ; then + echo "*** with $libname but no candidates were found. (...for regex pattern test)" + else + echo "*** with $libname and none of the candidates passed a file format test" + echo "*** using a regex pattern. Last file checked: $potlib" + fi + fi + else + # Add a -L argument. + newdeplibs="$newdeplibs $a_deplib" + fi + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs="" + if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \ + -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' | + grep . >/dev/null; then + echo + if test "X$deplibs_check_method" = "Xnone"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + fi + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'` + ;; + esac + + if test "$droppeddeps" = yes; then + if test "$module" = yes; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + echo "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test $allow_undefined = no; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi # All the library-specific variables (install_libdir is set above). library_names= old_library= dlname= - current=0 - revision=0 - age=0 - - if test -n "$objs"; then - $echo "$modename: cannot build libtool library \`$output' from non-libtool objects:$objs" 2>&1 - exit 1 - fi - # How the heck are we supposed to write a wrapper for a shared library? - if test -n "$link_against_libtool_libs"; then - $echo "$modename: libtool library \`$output' may not depend on uninstalled libraries:$link_against_libtool_libs" 1>&2 - exit 1 - fi - - if test -n "$dlfiles$dlprefiles"; then - $echo "$modename: warning: \`-dlopen' is ignored while creating libtool libraries" 1>&2 - # Nullify the symbol file. - compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` - fi + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then + if test $hardcode_into_libs = yes; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath="$finalize_rpath" + test "$mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + dep_rpath="$dep_rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) perm_rpath="$perm_rpath $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval dep_rpath=\"$hardcode_libdir_flag_spec\" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + rpath="$rpath$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi - if test -z "$rpath"; then - $echo "$modename: you must specify an installation directory with \`-rpath'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi + shlibpath="$finalize_shlibpath" + test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath" + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi - set dummy $rpath - if test $# -gt 2; then - $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 - fi - install_libdir="$2" + # Get the real and link names of the library. + eval library_names=\"$library_names_spec\" + set dummy $library_names + realname="$2" + shift; shift - # Parse the version information argument. - IFS="${IFS= }"; save_ifs="$IFS"; IFS=':' - set dummy $vinfo - IFS="$save_ifs" + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + test -z "$dlname" && dlname=$soname - if test -n "$5"; then - $echo "$modename: too many parameters to \`-version-info'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi + lib="$output_objdir/$realname" + for link + do + linknames="$linknames $link" + done - test -n "$2" && current="$2" - test -n "$3" && revision="$3" - test -n "$4" && age="$4" + # Ensure that we have .o objects for linkers which dislike .lo + # (e.g. aix) in case we are running --disable-static + for obj in $libobjs; do + xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$obj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` + oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` + if test ! -f $xdir/$oldobj; then + $show "(cd $xdir && ${LN_S} $baseobj $oldobj)" + $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $? + fi + done - # Check that each of the things are valid numbers. - case "$current" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then + $show "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $run $rm $export_symbols + eval cmds=\"$export_symbols_cmds\" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + if test -n "$export_symbols_regex"; then + $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\"" + $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + $show "$mv \"${export_symbols}T\" \"$export_symbols\"" + $run eval '$mv "${export_symbols}T" "$export_symbols"' + fi + fi + fi - case "$revision" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"' + fi - case "$age" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + else + gentop="$output_objdir/${outputname}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + for xlib in $convenience; do + # Extract the objects. + case $xlib in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` + done + fi + fi - if test $age -gt $current; then - $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - fi - - # Calculate the version variables. - version_vars="version_type current age revision" - case "$version_type" in - none) ;; - - linux) - version_vars="$version_vars major versuffix" - major=`expr $current - $age` - versuffix="$major.$age.$revision" - ;; - - osf) - version_vars="$version_vars versuffix verstring" - major=`expr $current - $age` - versuffix="$current.$age.$revision" - verstring="$versuffix" - - # Add in all the interfaces that we are compatible with. - loop=$age - while test $loop != 0; do - iface=`expr $current - $loop` - loop=`expr $loop - 1` - verstring="$verstring:${iface}.0" - done - - # Make executables depend on our current version. - verstring="$verstring:${current}.0" - ;; - - sunos) - version_vars="$version_vars major versuffix" - major="$current" - versuffix="$current.$revision" - ;; + if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + linker_flags="$linker_flags $flag" + fi - *) - $echo "$modename: unknown library version type \`$version_type'" 1>&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 - ;; - esac + # Make a backup of the uninstalled library when relinking + if test "$mode" = relink; then + $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $? + fi - # Create the output directory, or remove our outputs if we need to. - if test -d $objdir; then - $show "$rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.*" - $run $rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.* - else - $show "$mkdir $objdir" - $run $mkdir $objdir - status=$? - if test $status -eq 0 || test -d $objdir; then : + # Do each of the archive commands. + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval cmds=\"$archive_expsym_cmds\" else - exit $status + save_deplibs="$deplibs" + for conv in $convenience; do + tmp_deplibs= + for test_deplib in $deplibs; do + if test "$test_deplib" != "$conv"; then + tmp_deplibs="$tmp_deplibs $test_deplib" + fi + done + deplibs="$tmp_deplibs" + done + eval cmds=\"$archive_cmds\" + deplibs="$save_deplibs" fi - fi - - # Check to see if the archive will have undefined symbols. - if test "$allow_undefined" = yes; then - if test "$allow_undefined_flag" = unsupported; then - $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 - build_libtool_libs=no - build_old_libs=yes - fi - else - # Don't allow undefined symbols. - allow_undefined_flag="$no_undefined_flag" - fi - - # Add libc to deplibs on all systems. - dependency_libs="$deplibs" - deplibs="$deplibs -lc" - - if test "$build_libtool_libs" = yes; then - # Get the real and link names of the library. - eval library_names=\"$library_names_spec\" - set dummy $library_names - realname="$2" - shift; shift - - if test -n "$soname_spec"; then - eval soname=\"$soname_spec\" - else - soname="$realname" - fi - - lib="$objdir/$realname" - for link - do - linknames="$linknames $link" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? done + IFS="$save_ifs" - # Use standard objects if they are PIC. - test -z "$pic_flag" && libobjs=`$echo "X$libobjs " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//g'` - - # Do each of the archive commands. - eval cmds=\"$archive_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" + # Restore the uninstalled library and exit + if test "$mode" = relink; then + $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $? + exit 0 + fi - # Create links to the real library. - for linkname in $linknames; do - $show "(cd $objdir && $LN_S $realname $linkname)" - $run eval '(cd $objdir && $LN_S $realname $linkname)' || exit $? - done + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)" + $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $? + fi + done - # If -export-dynamic was specified, set the dlname. - if test "$export_dynamic" = yes; then - # On all known operating systems, these are identical. - dlname="$soname" - fi + # If -module or -export-dynamic was specified, set the dlname. + if test "$module" = yes || test "$export_dynamic" = yes; then + # On all known operating systems, these are identical. + dlname="$soname" + fi fi - - # Now set the variables for building old libraries. - oldlib="$objdir/$libname.a" ;; - *.lo | *.o) - if test -n "$link_against_libtool_libs"; then - $echo "$modename: error: cannot link libtool libraries into reloadable objects" 1>&2 - exit 1 - fi - + obj) if test -n "$deplibs"; then - $echo "$modename: warning: \`-l' and \`-L' are ignored while creating objects" 1>&2 + $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2 fi - if test -n "$dlfiles$dlprefiles"; then - $echo "$modename: warning: \`-dlopen' is ignored while creating objects" 1>&2 - # Nullify the symbol file. - compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2 fi if test -n "$rpath"; then - $echo "$modename: warning: \`-rpath' is ignored while creating objects" 1>&2 + $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2 + fi + + if test -n "$xrpath"; then + $echo "$modename: warning: \`-R' is ignored for objects" 1>&2 fi if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored while creating objects" 1>&2 + $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2 fi if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2 + $echo "$modename: warning: \`-release' is ignored for objects" 1>&2 fi - case "$output" in + case $output in *.lo) - if test -n "$objs"; then - $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 - exit 1 - fi - libobj="$output" - obj=`$echo "X$output" | $Xsed -e 's/\.lo$/.o/'` - ;; + if test -n "$objs$old_deplibs"; then + $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 + exit 1 + fi + libobj="$output" + obj=`$echo "X$output" | $Xsed -e "$lo2o"` + ;; *) - libobj= - obj="$output" - ;; + libobj= + obj="$output" + ;; esac # Delete the old objects. $run $rm $obj $libobj + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # reload_cmds runs $LD directly, so let us get rid of + # -Wl from whole_archive_flag_spec + wl= + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\" + else + gentop="$output_objdir/${obj}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + for xlib in $convenience; do + # Extract the objects. + case $xlib in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` + done + fi + fi + # Create the old-style object. - reload_objs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^ ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'` + reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test output="$obj" eval cmds=\"$reload_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' + save_ifs="$IFS"; IFS='~' for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? done IFS="$save_ifs" # Exit if we aren't doing a library object file. - test -z "$libobj" && exit 0 + if test -z "$libobj"; then + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop + fi + + exit 0 + fi if test "$build_libtool_libs" != yes; then - # Create an invalid libtool object if no PIC, so that we don't - # accidentally link it into a program. - $show "echo timestamp > $libobj" - $run eval "echo timestamp > $libobj" || exit $? - exit 0 - fi - - if test -n "$pic_flag"; then - # Only do commands if we really have different PIC objects. - reload_objs="$libobjs" - output="$libobj" - eval cmds=\"$reload_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + $show "echo timestamp > $libobj" + $run eval "echo timestamp > $libobj" || exit $? + exit 0 + fi + + if test -n "$pic_flag" || test "$pic_mode" != default; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output="$libobj" + eval cmds=\"$reload_cmds\" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" else - # Just create a symlink. - $show "$LN_S $obj $libobj" - $run $LN_S $obj $libobj || exit 1 + # Just create a symlink. + $show $rm $libobj + $run $rm $libobj + xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$libobj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` + oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` + $show "(cd $xdir && $LN_S $oldobj $baseobj)" + $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $? + fi + + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop fi exit 0 ;; - *) + prog) + case $host in + *cygwin*) output=`echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;; + esac if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored while linking programs" 1>&2 + $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2 fi if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2 + $echo "$modename: warning: \`-release' is ignored for programs" 1>&2 fi - if test -n "$rpath"; then + if test "$preload" = yes; then + if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown && + test "$dlopen_self_static" = unknown; then + $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support." + fi + fi + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'` + finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'` + case $host in + *darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + compile_command="$compile_command ${wl}-bind_at_load" + finalize_command="$finalize_command ${wl}-bind_at_load" + ;; + esac + ;; + esac + + compile_command="$compile_command $compile_deplibs" + finalize_command="$finalize_command $finalize_deplibs" + + if test -n "$rpath$xrpath"; then # If the user specified any rpath flags, then add them. - for libdir in $rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - # Put the magic libdir with the hardcode flag. - hardcode_libdirs="$libdir" - libdir="@HARDCODE_LIBDIRS@" - else - # Just accumulate the unique libdirs. - case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - libdir= - fi - fi - - if test -n "$libdir"; then - eval flag=\"$hardcode_libdir_flag_spec\" - - compile_command="$compile_command $flag" - finalize_command="$finalize_command $flag" - fi - elif test -n "$runpath_var"; then - case "$perm_rpath " in - *" $libdir "*) ;; - *) perm_rpath="$perm_rpath $libdir" ;; - esac - fi + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" ;; + esac done fi - # Substitute the hardcoded libdirs into the compile commands. - if test -n "$hardcode_libdir_separator"; then - compile_command=`$echo "X$compile_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"` + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + rpath="$rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) perm_rpath="$perm_rpath $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) + case :$dllsearchpath: in + *":$libdir:"*) ;; + *) dllsearchpath="$dllsearchpath:$libdir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" fi - - if test -n "$libobjs" && test "$build_old_libs" = yes; then - # Transform all the library objects into standard objects. - compile_command=`$echo "X$compile_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'` - finalize_command=`$echo "X$finalize_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'` + compile_rpath="$rpath" + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + rpath="$rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" fi + finalize_rpath="$rpath" - if test "$export_dynamic" = yes && test -n "$NM" && test -n "$global_symbol_pipe"; then - dlsyms="${output}S.c" - else - dlsyms= + if test -n "$libobjs" && test "$build_old_libs" = yes; then + # Transform all the library objects into standard objects. + compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` fi - if test -n "$dlsyms"; then - # Add our own program objects to the preloaded list. - dlprefiles=`$echo "X$objs$dlprefiles " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'` - - # Discover the nlist of each of the dlfiles. - nlist="$objdir/${output}.nm" - - if test -d $objdir; then - $show "$rm $nlist ${nlist}T" - $run $rm "$nlist" "${nlist}T" + dlsyms= + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + dlsyms="${outputname}S.c" else - $show "$mkdir $objdir" - $run $mkdir $objdir - status=$? - if test $status -eq 0 || test -d $objdir; then : - else - exit $status - fi + $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 fi + fi - for arg in $dlprefiles; do - $show "extracting global C symbols from \`$arg'" - $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" - done + if test -n "$dlsyms"; then + case $dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist="$output_objdir/${outputname}.nm" - # Parse the name list into a source file. - $show "creating $objdir/$dlsyms" - if test -z "$run"; then - # Make sure we at least have an empty file. - test -f "$nlist" || : > "$nlist" + $show "$rm $nlist ${nlist}S ${nlist}T" + $run $rm "$nlist" "${nlist}S" "${nlist}T" - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - wcout=`wc "$nlist" 2>/dev/null` - count=`echo "X$wcout" | $Xsed -e 's/^[ ]*\([0-9][0-9]*\).*$/\1/'` - (test "$count" -ge 0) 2>/dev/null || count=-1 - else - $rm "$nlist"T - count=-1 - fi + # Parse the name list into a source file. + $show "creating $output_objdir/$dlsyms" - case "$dlsyms" in - "") ;; - *.c) - $echo > "$objdir/$dlsyms" "\ -/* $dlsyms - symbol resolution table for \`$output' dlsym emulation. */ -/* Generated by $PROGRAM - GNU $PACKAGE $VERSION */ + test -z "$run" && $echo > "$output_objdir/$dlsyms" "\ +/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */ +/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */ #ifdef __cplusplus extern \"C\" { #endif /* Prevent the only kind of declaration conflicts we can make. */ -#define dld_preloaded_symbol_count some_other_symbol -#define dld_preloaded_symbols some_other_symbol +#define lt_preloaded_symbols some_other_symbol /* External symbol declarations for the compiler. */\ " - if test -f "$nlist"; then - sed -e 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> "$objdir/$dlsyms" + if test "$dlself" = yes; then + $show "generating symbol list for \`$output'" + + test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + for arg in $progfiles; do + $show "extracting global C symbols from \`$arg'" + $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + $run eval '$mv "$nlist"T "$nlist"' + fi + + if test -n "$export_symbols_regex"; then + $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T' + $run eval '$mv "$nlist"T "$nlist"' + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols="$output_objdir/$output.exp" + $run $rm $export_symbols + $run eval "${SED} -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + else + $run eval "${SED} -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"' + $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T' + $run eval 'mv "$nlist"T "$nlist"' + fi + fi + + for arg in $dlprefiles; do + $show "extracting global C symbols from \`$arg'" + name=`echo "$arg" | ${SED} -e 's%^.*/%%'` + $run eval 'echo ": $name " >> "$nlist"' + $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" + done + + if test -z "$run"; then + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $mv "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if grep -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : else - echo '/* NONE */' >> "$objdir/$dlsyms" + grep -v "^: " < "$nlist" > "$nlist"S fi - $echo >> "$objdir/$dlsyms" "\ + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$dlsyms" + fi -#undef dld_preloaded_symbol_count -#undef dld_preloaded_symbols + $echo >> "$output_objdir/$dlsyms" "\ + +#undef lt_preloaded_symbols #if defined (__STDC__) && __STDC__ -# define __ptr_t void * +# define lt_ptr void * #else -# define __ptr_t char * +# define lt_ptr char * +# define const #endif -/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */ -int dld_preloaded_symbol_count = $count; - /* The mapping between symbol names and symbols. */ -struct { - char *name; - __ptr_t address; +const struct { + const char *name; + lt_ptr address; } -dld_preloaded_symbols[] = +lt_preloaded_symbols[] = {\ " - if test -f "$nlist"; then - sed 's/^\(.*\) \(.*\)$/ {"\1", (__ptr_t) \&\2},/' < "$nlist" >> "$objdir/$dlsyms" - fi + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms" - $echo >> "$objdir/$dlsyms" "\ - {0, (__ptr_t) 0} + $echo >> "$output_objdir/$dlsyms" "\ + {0, (lt_ptr) 0} }; +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_preloaded_symbols; +} +#endif + #ifdef __cplusplus } #endif\ " - ;; + fi - *) - $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 - exit 1 - ;; + pic_flag_for_symtable= + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + case "$compile_command " in + *" -static "*) ;; + *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";; + esac;; + *-*-hpux*) + case "$compile_command " in + *" -static "*) ;; + *) pic_flag_for_symtable=" $pic_flag -DPIC";; + esac esac - fi - - # Now compile the dynamic symbol file. - $show "(cd $objdir && $CC -c$no_builtin_flag \"$dlsyms\")" - $run eval '(cd $objdir && $CC -c$no_builtin_flag "$dlsyms")' || exit $? - # Transform the symbol file into the correct name. - compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"` - elif test "$export_dynamic" != yes; then - test -n "$dlfiles$dlprefiles" && $echo "$modename: warning: \`-dlopen' and \`-dlpreopen' are ignored without \`-export-dynamic'" 1>&2 - else - # We keep going just in case the user didn't refer to - # dld_preloaded_symbols. The linker will fail if global_symbol_pipe - # really was required. - $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 + # Now compile the dynamic symbol file. + $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")" + $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $? - # Nullify the symbol file. - compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` - fi + # Clean up the generated files. + $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T" + $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T" - if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then - # Replace the output file specification. - compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` - finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` + # Transform the symbol file into the correct name. + compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` + finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` + ;; + *) + $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 + exit 1 + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. - # We have no uninstalled library dependencies, so finalize right now. - $show "$compile_command" - $run eval "$compile_command" - exit $? + # Nullify the symbol file. + compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` + finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` fi - # Replace the output file specification. - compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'%g'` - finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'T%g'` + if test $need_relink = no || test "$build_libtool_libs" != yes; then + # Replace the output file specification. + compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` + link_command="$compile_command$compile_rpath" - # Create the binary in the object directory, then wrap it. - if test -d $objdir; then : - else - $show "$mkdir $objdir" - $run $mkdir $objdir + # We have no uninstalled library dependencies, so finalize right now. + $show "$link_command" + $run eval "$link_command" status=$? - if test $status -eq 0 || test -d $objdir; then : - else - exit $status + + # Delete the generated files. + if test -n "$dlsyms"; then + $show "$rm $output_objdir/${outputname}S.${objext}" + $run $rm "$output_objdir/${outputname}S.${objext}" fi + + exit $status fi if test -n "$shlibpath_var"; then - # We should set the shlibpath_var - rpath= - for dir in $temp_rpath; do - case "$dir" in - /* | [A-Za-z]:\\*) - # Absolute path. - rpath="$rpath$dir:" - ;; - *) - # Relative path: add a thisdir entry. - rpath="$rpath\$thisdir/$dir:" - ;; - esac - done - temp_rpath="$rpath" - fi - - # Delete the old output file. - $run $rm $output - - if test -n "$compile_shlibpath"; then - compile_command="$shlibpath_var=\"$compile_shlibpath\$$shlibpath_var\" $compile_command" + # We should set the shlibpath_var + rpath= + for dir in $temp_rpath; do + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) + # Absolute path. + rpath="$rpath$dir:" + ;; + *) + # Relative path: add a thisdir entry. + rpath="$rpath\$thisdir/$dir:" + ;; + esac + done + temp_rpath="$rpath" + fi + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" fi if test -n "$finalize_shlibpath"; then - finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + rpath="$rpath$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + rpath="$rpath$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi fi - if test -n "$runpath_var" && test -n "$perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $perm_rpath; do - rpath="$rpath$dir:" - done - compile_command="$runpath_var=\"$rpath\$$runpath_var\" $compile_command" - finalize_command="$runpath_var=\"$rpath\$$runpath_var\" $finalize_command" + if test "$no_install" = yes; then + # We don't need to create a wrapper script. + link_command="$compile_var$compile_command$compile_rpath" + # Replace the output file specification. + link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $run $rm $output + # Link the executable and exit + $show "$link_command" + $run eval "$link_command" || exit $? + exit 0 fi - case "$hardcode_action" in - relink) - # AGH! Flame the AIX and HP-UX people for me, will ya? - $echo "$modename: warning: using a buggy system linker" 1>&2 - $echo "$modename: relinking will be required before \`$output' can be installed" 1>&2 - ;; - esac + if test "$hardcode_action" = relink; then + # Fast installation is not supported + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + + $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2 + $echo "$modename: \`$output' will be relinked during installation" 1>&2 + else + if test "$fast_install" != no; then + link_command="$finalize_var$compile_command$finalize_rpath" + if test "$fast_install" = yes; then + relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'` + else + # fast_install is set to needless + relink_command= + fi + else + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + fi + fi + + # Replace the output file specification. + link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` - $show "$compile_command" - $run eval "$compile_command" || exit $? + # Delete the old output files. + $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname + + $show "$link_command" + $run eval "$link_command" || exit $? # Now create the wrapper script. $show "creating $output" - # Quote the finalize command for shipping. - finalize_command=`$echo "X$finalize_command" | $Xsed -e "$sed_quote_subst"` + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` + relink_command="$var=\"$var_value\"; export $var; $relink_command" + fi + done + relink_command="(cd `pwd`; $relink_command)" + relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` + fi # Quote $echo for shipping. - qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` + if test "X$echo" = "X$SHELL $0 --fallback-echo"; then + case $0 in + [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";; + *) qecho="$SHELL `pwd`/$0 --fallback-echo";; + esac + qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"` + else + qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` + fi # Only actually do things if our run command is non-null. if test -z "$run"; then - $rm $output - trap "$rm $output; exit 1" 1 2 15 + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) output=`echo $output|${SED} 's,.exe$,,'` ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) exeext=.exe ;; + *) exeext= ;; + esac + $rm $output + trap "$rm $output; exit 1" 1 2 15 - $echo > $output "\ -#! /bin/sh + $echo > $output "\ +#! $SHELL -# $output - temporary wrapper script for $objdir/$output -# Generated by ltmain.sh - GNU $PACKAGE $VERSION +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP # # The $output program cannot be directly executed until all the libtool # libraries that it depends on are installed. # -# This wrapper script should never be moved out of \``pwd`'. +# This wrapper script should never be moved out of the build directory. # If it is, it will not operate correctly. # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. -Xsed='sed -e s/^X//' +Xsed="${SED}"' -e 1s/^X//' sed_quote_subst='$sed_quote_subst' # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. -if test \"\${CDPATH+set}\" = set; then CDPATH=; export CDPATH; fi +if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi + +relink_command=\"$relink_command\" # This environment variable determines our operation mode. if test \"\$libtool_install_magic\" = \"$magic\"; then - # install mode needs the following variables: - link_against_libtool_libs='$link_against_libtool_libs' - finalize_command=\"$finalize_command\" + # install mode needs the following variable: + notinst_deplibs='$notinst_deplibs' else # When we are sourced in execute mode, \$file and \$echo are already set. - if test \"\$libtool_execute_magic\" = \"$magic\"; then : - else + if test \"\$libtool_execute_magic\" != \"$magic\"; then echo=\"$qecho\" file=\"\$0\" + # Make sure echo works. + if test \"X\$1\" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift + elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then + # Yippee, \$echo works! + : + else + # Restart under the correct shell, and then maybe \$echo will work. + exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"} + fi fi\ " - $echo >> $output "\ + $echo >> $output "\ # Find the directory that this script lives in. thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\` test \"x\$thisdir\" = \"x\$file\" && thisdir=. # Follow symbolic links until we get to the real thisdir. - file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\` + file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\` while test -n \"\$file\"; do destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\` # If there was a directory component, then change thisdir. if test \"x\$destdir\" != \"x\$file\"; then case \"\$destdir\" in - /* | [A-Za-z]:\\*) thisdir=\"\$destdir\" ;; + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; *) thisdir=\"\$thisdir/\$destdir\" ;; esac fi file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\` - file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\` + file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\` done # Try to get the absolute directory name. absdir=\`cd \"\$thisdir\" && pwd\` test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test "$fast_install" = yes; then + echo >> $output "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || \\ + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + if test ! -d \"\$progdir\"; then + $mkdir \"\$progdir\" + else + $rm \"\$progdir/\$file\" + fi" + + echo >> $output "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + $echo \"\$relink_command_output\" >&2 + $rm \"\$progdir/\$file\" + exit 1 + fi + fi + + $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $rm \"\$progdir/\$program\"; + $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $rm \"\$progdir/\$file\" + fi" + else + echo >> $output "\ + program='$outputname' progdir=\"\$thisdir/$objdir\" - program='$output' +" + fi + + echo >> $output "\ if test -f \"\$progdir/\$program\"; then" - # Export our shlibpath_var if we have one. - if test -n "$shlibpath_var" && test -n "$temp_rpath"; then - $echo >> $output "\ + # Export our shlibpath_var if we have one. + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $echo >> $output "\ # Add our own library path to $shlibpath_var $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" # Some systems cannot cope with colon-terminated $shlibpath_var - $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/:*\$//'\` + # The second colon is a workaround for a bug in BeOS R4 ${SED} + $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\` export $shlibpath_var " - fi + fi + + # fixup the dll searchpath if we need to. + if test -n "$dllsearchpath"; then + $echo >> $output "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi - $echo >> $output "\ + $echo >> $output "\ if test \"\$libtool_execute_magic\" != \"$magic\"; then # Run the actual program with our arguments. +" + case $host in + # win32 systems need to use the prog path for dll + # lookup to work + *-*-cygwin* | *-*-pw32*) + $echo >> $output "\ + exec \$progdir/\$program \${1+\"\$@\"} +" + ;; + + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2*) + $echo >> $output "\ + exec \$progdir\\\\\$program \${1+\"\$@\"} +" + ;; + *) + $echo >> $output "\ # Export the path to the program. PATH=\"\$progdir:\$PATH\" export PATH exec \$program \${1+\"\$@\"} - +" + ;; + esac + $echo >> $output "\ \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\" exit 1 fi @@ -1530,48 +3814,189 @@ else fi fi\ " - chmod +x $output + chmod +x $output fi exit 0 ;; esac # See if we need to build an old-fashioned archive. - if test "$build_old_libs" = "yes"; then - # Transform .lo files to .o files. - oldobjs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^ ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'` + for oldlib in $oldlibs; do + + if test "$build_libtool_libs" = convenience; then + oldobjs="$libobjs_save" + addlibs="$convenience" + build_libtool_libs=no + else + if test "$build_libtool_libs" = module; then + oldobjs="$libobjs_save" + build_libtool_libs=no + else + oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP` + fi + addlibs="$old_convenience" + fi + + if test -n "$addlibs"; then + gentop="$output_objdir/${outputname}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + # Add in members from convenience archives. + for xlib in $addlibs; do + # Extract the objects. + case $xlib in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP` + done + fi # Do each command in the archive commands. if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then eval cmds=\"$old_archive_from_new_cmds\" else + # Ensure that we have .o objects in place in case we decided + # not to build a shared library, and have fallen back to building + # static libs even though --disable-static was passed! + for oldobj in $oldobjs; do + if test ! -f $oldobj; then + xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$oldobj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'` + obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` + $show "(cd $xdir && ${LN_S} $obj $baseobj)" + $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $? + fi + done + eval cmds=\"$old_archive_cmds\" fi - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' + save_ifs="$IFS"; IFS='~' for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? done IFS="$save_ifs" + done + + if test -n "$generated"; then + $show "${rm}r$generated" + $run ${rm}r$generated fi # Now create the libtool archive. - case "$output" in + case $output in *.la) old_library= - test "$build_old_libs" = yes && old_library="$libname.a" - + test "$build_old_libs" = yes && old_library="$libname.$libext" $show "creating $output" + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` + relink_command="$var=\"$var_value\"; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + relink_command="(cd `pwd`; $SHELL $0 --mode=relink $libtool_args)" + relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` + # Only create the output if not a dry run. if test -z "$run"; then - $echo > $output "\ -# $output - a libtool library file -# Generated by ltmain.sh - GNU $PACKAGE $VERSION + for installed in no yes; do + if test "$installed" = yes; then + if test -z "$install_libdir"; then + break + fi + output="$output_objdir/$outputname"i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'` + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + if test -z "$libdir"; then + $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdependency_libs="$newdependency_libs $libdir/$name" + ;; + *) newdependency_libs="$newdependency_libs $deplib" ;; + esac + done + dependency_libs="$newdependency_libs" + newdlfiles= + for lib in $dlfiles; do + name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + if test -z "$libdir"; then + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdlfiles="$newdlfiles $libdir/$name" + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + if test -z "$libdir"; then + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdlprefiles="$newdlprefiles $libdir/$name" + done + dlprefiles="$newdlprefiles" + fi + $rm $output + # place dlname in correct position for cygwin + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;; + esac + $echo > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP +# +# Please DO NOT delete this file! +# It is necessary for linking the library. # The name that we can dlopen(3). -dlname='$dlname' +dlname='$tdlname' # Names of this library. library_names='$library_names' @@ -1587,15 +4012,26 @@ current=$current age=$age revision=$revision +# Is this an already installed library? +installed=$installed + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + # Directory that this library needs to be installed in: -libdir='$install_libdir'\ -" +libdir='$install_libdir'" + if test "$installed" = no && test $need_relink = yes; then + $echo >> $output "\ +relink_command=\"$relink_command\"" + fi + done fi # Do a symbolic link so that the libtool archive can be found in # LD_LIBRARY_PATH before the program is installed. - $show "(cd $objdir && $LN_S ../$output $output)" - $run eval "(cd $objdir && $LN_S ../$output $output)" || exit 1 + $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" + $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $? ;; esac exit 0 @@ -1605,12 +4041,14 @@ libdir='$install_libdir'\ install) modename="$modename: install" - # There may be an optional /bin/sh argument at the beginning of + # There may be an optional sh(1) argument at the beginning of # install_prog (especially on Windows NT). - if test "$nonopt" = "$SHELL"; then + if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || + # Allow the use of GNU shtool's install command. + $echo "X$nonopt" | $Xsed | grep shtool > /dev/null; then # Aesthetically quote it. arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"` - case "$arg" in + case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) arg="\"$arg\"" ;; @@ -1626,7 +4064,7 @@ libdir='$install_libdir'\ # The real first argument should be the name of the installation program. # Aesthetically quote it. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in + case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) arg="\"$arg\"" ;; @@ -1639,42 +4077,42 @@ libdir='$install_libdir'\ opts= prev= install_type= - isdir= + isdir=no stripme= for arg do if test -n "$dest"; then - files="$files $dest" - dest="$arg" - continue + files="$files $dest" + dest="$arg" + continue fi - case "$arg" in + case $arg in -d) isdir=yes ;; -f) prev="-f" ;; -g) prev="-g" ;; -m) prev="-m" ;; -o) prev="-o" ;; -s) - stripme=" -s" - continue - ;; + stripme=" -s" + continue + ;; -*) ;; *) - # If the previous option needed an argument, then skip it. - if test -n "$prev"; then - prev= - else - dest="$arg" - continue - fi - ;; + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + prev= + else + dest="$arg" + continue + fi + ;; esac # Aesthetically quote the argument. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in + case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) arg="\"$arg\"" ;; @@ -1696,9 +4134,9 @@ libdir='$install_libdir'\ if test -z "$files"; then if test -z "$dest"; then - $echo "$modename: no file or destination specified" 1>&2 + $echo "$modename: no file or destination specified" 1>&2 else - $echo "$modename: you must specify a destination" 1>&2 + $echo "$modename: you must specify a destination" 1>&2 fi $echo "$help" 1>&2 exit 1 @@ -1709,7 +4147,7 @@ libdir='$install_libdir'\ # Check to see that the destination is a directory. test -d "$dest" && isdir=yes - if test -n "$isdir"; then + if test "$isdir" = yes; then destdir="$dest" destname= else @@ -1720,23 +4158,23 @@ libdir='$install_libdir'\ # Not a directory, so check to see that there is only one file specified. set dummy $files if test $# -gt 2; then - $echo "$modename: \`$dest' is not a directory" 1>&2 - $echo "$help" 1>&2 - exit 1 + $echo "$modename: \`$dest' is not a directory" 1>&2 + $echo "$help" 1>&2 + exit 1 fi fi - case "$destdir" in - /* | [A-Za-z]:\\*) ;; + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; *) for file in $files; do - case "$file" in - *.lo) ;; - *) - $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac + case $file in + *.lo) ;; + *) + $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + esac done ;; esac @@ -1751,210 +4189,266 @@ libdir='$install_libdir'\ for file in $files; do # Do each installation. - case "$file" in - *.a) - # Do the static libraries later. - staticlibs="$staticlibs $file" - ;; + case $file in + *.$libext) + # Do the static libraries later. + staticlibs="$staticlibs $file" + ;; *.la) - # Check to see that this really is a libtool archive. - if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then : - else - $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - library_names= - old_library= - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Add the libdir to current_libdirs if it is the destination. - if test "X$destdir" = "X$libdir"; then - case "$current_libdirs " in - *" $libdir "*) ;; - *) current_libdirs="$current_libdirs $libdir" ;; - esac - else - # Note the libdir as a future libdir. - case "$future_libdirs " in - *" $libdir "*) ;; - *) future_libdirs="$future_libdirs $libdir" ;; - esac - fi - - dir="`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/" - test "X$dir" = "X$file/" && dir= - dir="$dir$objdir" - - # See the names of the shared library. - set dummy $library_names - if test -n "$2"; then - realname="$2" - shift - shift - - # Install the shared library and build the symlinks. - $show "$install_prog $dir/$realname $destdir/$realname" - $run eval "$install_prog $dir/$realname $destdir/$realname" || exit $? - test "X$dlname" = "X$realname" && dlname= - - if test $# -gt 0; then - # Delete the old symlinks. - rmcmd="$rm" - for linkname - do - rmcmd="$rmcmd $destdir/$linkname" - done - $show "$rmcmd" - $run $rmcmd - - # ... and create new ones. - for linkname - do - test "X$dlname" = "X$linkname" && dlname= - $show "(cd $destdir && $LN_S $realname $linkname)" - $run eval "(cd $destdir && $LN_S $realname $linkname)" - done - fi - - if test -n "$dlname"; then - # Install the dynamically-loadable library. - $show "$install_prog $dir/$dlname $destdir/$dlname" - $run eval "$install_prog $dir/$dlname $destdir/$dlname" || exit $? - fi - - # Do each command in the postinstall commands. - lib="$destdir/$realname" - eval cmds=\"$postinstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - fi - - # Install the pseudo-library for information purposes. - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - $show "$install_prog $file $destdir/$name" - $run eval "$install_prog $file $destdir/$name" || exit $? - - # Maybe install the static library, too. - test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" - ;; + # Check to see that this really is a libtool archive. + if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + library_names= + old_library= + relink_command= + # If there is no directory component, then add one. + case $file in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) current_libdirs="$current_libdirs $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) future_libdirs="$future_libdirs $libdir" ;; + esac + fi + + dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/ + test "X$dir" = "X$file/" && dir= + dir="$dir$objdir" + + if test -n "$relink_command"; then + $echo "$modename: warning: relinking \`$file'" 1>&2 + $show "$relink_command" + if $run eval "$relink_command"; then : + else + $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 + continue + fi + fi + + # See the names of the shared library. + set dummy $library_names + if test -n "$2"; then + realname="$2" + shift + shift + + srcname="$realname" + test -n "$relink_command" && srcname="$realname"T + + # Install the shared library and build the symlinks. + $show "$install_prog $dir/$srcname $destdir/$realname" + $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $? + if test -n "$stripme" && test -n "$striplib"; then + $show "$striplib $destdir/$realname" + $run eval "$striplib $destdir/$realname" || exit $? + fi + + if test $# -gt 0; then + # Delete the old symlinks, and create new ones. + for linkname + do + if test "$linkname" != "$realname"; then + $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" + $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" + fi + done + fi + + # Do each command in the postinstall commands. + lib="$destdir/$realname" + eval cmds=\"$postinstall_cmds\" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + + # Install the pseudo-library for information purposes. + name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + instname="$dir/$name"i + $show "$install_prog $instname $destdir/$name" + $run eval "$install_prog $instname $destdir/$name" || exit $? + + # Maybe install the static library, too. + test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" + ;; *.lo) - # Install (i.e. copy) a libtool object. - - # Figure out destination file name, if it wasn't already specified. - if test -n "$destname"; then - destfile="$destdir/$destname" - else - destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - destfile="$destdir/$destfile" - fi - - # Deduce the name of the destination old-style object file. - case "$destfile" in - *.lo) - staticdest=`$echo "X$destfile" | $Xsed -e 's/\.lo$/\.o/'` - ;; - *.o) - staticdest="$destfile" - destfile= - ;; - *) - $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac - - # Install the libtool object if requested. - if test -n "$destfile"; then - $show "$install_prog $file $destfile" - $run eval "$install_prog $file $destfile" || exit $? - fi - - # Install the old object if enabled. - if test "$build_old_libs" = yes; then - # Deduce the name of the old-style object file. - staticobj=`$echo "X$file" | $Xsed -e 's/\.lo$/\.o/'` - - $show "$install_prog $staticobj $staticdest" - $run eval "$install_prog \$staticobj \$staticdest" || exit $? - fi - exit 0 - ;; + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + destfile="$destdir/$destfile" + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"` + ;; + *.$objext) + staticdest="$destfile" + destfile= + ;; + *) + $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + esac + + # Install the libtool object if requested. + if test -n "$destfile"; then + $show "$install_prog $file $destfile" + $run eval "$install_prog $file $destfile" || exit $? + fi + + # Install the old object if enabled. + if test "$build_old_libs" = yes; then + # Deduce the name of the old-style object file. + staticobj=`$echo "X$file" | $Xsed -e "$lo2o"` + + $show "$install_prog $staticobj $staticdest" + $run eval "$install_prog \$staticobj \$staticdest" || exit $? + fi + exit 0 + ;; *) - # Do a test to see if this is really a libtool program. - if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then - link_against_libtool_libs= - finalize_command= - - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Check the variables that should have been set. - if test -z "$link_against_libtool_libs" || test -z "$finalize_command"; then - $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2 - exit 1 - fi - - finalize=yes - for lib in $link_against_libtool_libs; do - # Check to see that each library is installed. - libdir= - if test -f "$lib"; then - # If there is no directory component, then add one. - case "$lib" in - */* | *\\*) . $lib ;; - *) . ./$lib ;; - esac - fi - libfile="$libdir/`$echo "X$lib" | $Xsed -e 's%^.*/%%g'`" - if test -z "$libdir"; then - $echo "$modename: warning: \`$lib' contains no -rpath information" 1>&2 - elif test -f "$libfile"; then : - else - $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 - finalize=no - fi - done - - if test "$hardcode_action" = relink; then - if test "$finalize" = yes; then - $echo "$modename: warning: relinking \`$file' on behalf of your buggy system linker" 1>&2 - $show "$finalize_command" - if $run eval "$finalize_command"; then : - else - $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 - continue - fi - file="$objdir/$file"T - else - $echo "$modename: warning: cannot relink \`$file' on behalf of your buggy system linker" 1>&2 - fi - else - # Install the binary that we compiled earlier. + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + destfile="$destdir/$destfile" + fi + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin*|*mingw*) + wrapper=`echo $file | ${SED} -e 's,.exe$,,'` + ;; + *) + wrapper=$file + ;; + esac + if (${SED} -e '4q' $wrapper | egrep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then + notinst_deplibs= + relink_command= + + # If there is no directory component, then add one. + case $file in + */* | *\\*) . $wrapper ;; + *) . ./$wrapper ;; + esac + + # Check the variables that should have been set. + if test -z "$notinst_deplibs"; then + $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2 + exit 1 + fi + + finalize=yes + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + # If there is no directory component, then add one. + case $lib in + */* | *\\*) . $lib ;; + *) . ./$lib ;; + esac + fi + libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test + if test -n "$libdir" && test ! -f "$libfile"; then + $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 + finalize=no + fi + done + + relink_command= + # If there is no directory component, then add one. + case $file in + */* | *\\*) . $wrapper ;; + *) . ./$wrapper ;; + esac + + outputname= + if test "$fast_install" = no && test -n "$relink_command"; then + if test "$finalize" = yes && test -z "$run"; then + tmpdir="/tmp" + test -n "$TMPDIR" && tmpdir="$TMPDIR" + tmpdir="$tmpdir/libtool-$$" + if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then : + else + $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2 + continue + fi + file=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + outputname="$tmpdir/$file" + # Replace the output file specification. + relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'` + + $show "$relink_command" + if $run eval "$relink_command"; then : + else + $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 + ${rm}r "$tmpdir" + continue + fi + file="$outputname" + else + $echo "$modename: warning: cannot relink \`$file'" 1>&2 + fi + else + # Install the binary that we compiled earlier. file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"` - fi - fi + fi + fi - $show "$install_prog$stripme $file $dest" - $run eval "$install_prog\$stripme \$file \$dest" || exit $? - ;; + # remove .exe since cygwin /usr/bin/install will append another + # one anyways + case $install_prog,$host in + /usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + destfile=`echo $destfile | ${SED} -e 's,.exe$,,'` + ;; + esac + ;; + esac + $show "$install_prog$stripme $file $destfile" + $run eval "$install_prog\$stripme \$file \$destfile" || exit $? + test -n "$outputname" && ${rm}r "$tmpdir" + ;; esac done @@ -1967,13 +4461,18 @@ libdir='$install_libdir'\ $show "$install_prog $file $oldlib" $run eval "$install_prog \$file \$oldlib" || exit $? + if test -n "$stripme" && test -n "$striplib"; then + $show "$old_striplib $oldlib" + $run eval "$old_striplib $oldlib" || exit $? + fi + # Do each command in the postinstall commands. eval cmds=\"$old_postinstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' + save_ifs="$IFS"; IFS='~' for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? done IFS="$save_ifs" done @@ -1985,54 +4484,59 @@ libdir='$install_libdir'\ if test -n "$current_libdirs"; then # Maybe just do a dry run. test -n "$run" && current_libdirs=" -n$current_libdirs" - exec $SHELL $0 --finish$current_libdirs - exit 1 + exec_cmd='$SHELL $0 --finish$current_libdirs' + else + exit 0 fi - - exit 0 ;; # libtool finish mode finish) modename="$modename: finish" libdirs="$nonopt" + admincmds= if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then for dir do - libdirs="$libdirs $dir" + libdirs="$libdirs $dir" done for libdir in $libdirs; do if test -n "$finish_cmds"; then # Do each command in the finish commands. eval cmds=\"$finish_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - done - IFS="$save_ifs" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || admincmds="$admincmds + $cmd" + done + IFS="$save_ifs" fi if test -n "$finish_eval"; then # Do the single finish_eval. eval cmds=\"$finish_eval\" - $run eval "$cmds" + $run eval "$cmds" || admincmds="$admincmds + $cmds" fi done fi - echo "------------------------------------------------------------------------------" + # Exit here if they wanted silent mode. + test "$show" = ":" && exit 0 + + echo "----------------------------------------------------------------------" echo "Libraries have been installed in:" for libdir in $libdirs; do echo " $libdir" done echo - echo "To link against installed libraries in a given directory, LIBDIR," - echo "you must use the \`-LLIBDIR' flag during linking." - echo - echo " You will also need to do one of the following:" + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the \`-LLIBDIR'" + echo "flag during linking and do at least one of the following:" if test -n "$shlibpath_var"; then echo " - add LIBDIR to the \`$shlibpath_var' environment variable" echo " during execution" @@ -2047,13 +4551,16 @@ libdir='$install_libdir'\ echo " - use the \`$flag' linker flag" fi + if test -n "$admincmds"; then + echo " - have your system administrator run these commands:$admincmds" + fi if test -f /etc/ld.so.conf; then echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" fi echo echo "See any operating system documentation about shared libraries for" echo "more information, such as the ld(1) and ld.so(8) manual pages." - echo "------------------------------------------------------------------------------" + echo "----------------------------------------------------------------------" exit 0 ;; @@ -2071,32 +4578,31 @@ libdir='$install_libdir'\ # Handle -dlopen flags immediately. for file in $execute_dlfiles; do - if test -f "$file"; then : - else + if test ! -f "$file"; then $echo "$modename: \`$file' is not a file" 1>&2 $echo "$help" 1>&2 exit 1 fi dir= - case "$file" in + case $file in *.la) - # Check to see that this really is a libtool archive. - if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then : - else - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi + # Check to see that this really is a libtool archive. + if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi # Read the libtool library. dlname= library_names= - # If there is no directory component, then add one. - case "$file" in + # If there is no directory component, then add one. + case $file in */* | *\\*) . $file ;; - *) . ./$file ;; + *) . ./$file ;; esac # Skip this library if it cannot be dlopened. @@ -2125,7 +4631,7 @@ libdir='$install_libdir'\ *) $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2 - continue + continue ;; esac @@ -2149,13 +4655,13 @@ libdir='$install_libdir'\ args= for file do - case "$file" in + case $file in -*) ;; *) - # Do a test to see if this is really a libtool program. - if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then + # Do a test to see if this is really a libtool program. + if (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then # If there is no directory component, then add one. - case "$file" in + case $file in */* | *\\*) . $file ;; *) . ./$file ;; esac @@ -2163,7 +4669,7 @@ libdir='$install_libdir'\ # Transform arg to wrapped name. file="$progdir/$program" fi - ;; + ;; esac # Quote arguments (to preserve shell metacharacters). file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"` @@ -2171,32 +4677,48 @@ libdir='$install_libdir'\ done if test -z "$run"; then - # Export the shlibpath_var. - eval "export $shlibpath_var" + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi - # Now actually exec the command. - eval "exec \$cmd$args" + # Restore saved enviroment variables + if test "${save_LC_ALL+set}" = set; then + LC_ALL="$save_LC_ALL"; export LC_ALL + fi + if test "${save_LANG+set}" = set; then + LANG="$save_LANG"; export LANG + fi - $echo "$modename: cannot exec \$cmd$args" - exit 1 + # Now prepare to actually exec the command. + exec_cmd="\$cmd$args" else # Display what would be done. - eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" - $echo "export $shlibpath_var" + if test -n "$shlibpath_var"; then + eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" + $echo "export $shlibpath_var" + fi $echo "$cmd$args" exit 0 fi ;; - # libtool uninstall mode - uninstall) - modename="$modename: uninstall" + # libtool clean and uninstall mode + clean | uninstall) + modename="$modename: $mode" rm="$nonopt" files= + rmforce= + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" for arg do - case "$arg" in + case $arg in + -f) rm="$rm $arg"; rmforce=yes ;; -*) rm="$rm $arg" ;; *) files="$files $arg" ;; esac @@ -2208,74 +4730,123 @@ libdir='$install_libdir'\ exit 1 fi + rmdirs= + for file in $files; do dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. + if test "X$dir" = "X$file"; then + dir=. + objdir="$objdir" + else + objdir="$dir/$objdir" + fi name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + test $mode = uninstall && objdir="$dir" + + # Remember objdir for removal later, being careful to avoid duplicates + if test $mode = clean; then + case " $rmdirs " in + *" $objdir "*) ;; + *) rmdirs="$rmdirs $objdir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if (test -L "$file") >/dev/null 2>&1 \ + || (test -h "$file") >/dev/null 2>&1 \ + || test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif test "$rmforce" = yes; then + continue + fi rmfiles="$file" - case "$name" in + case $name in *.la) - # Possibly a libtool archive, so verify it. - if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then - . $dir/$name - - # Delete the libtool libraries and symlinks. - for n in $library_names; do - rmfiles="$rmfiles $dir/$n" - test "X$n" = "X$dlname" && dlname= - done - test -n "$dlname" && rmfiles="$rmfiles $dir/$dlname" - test -n "$old_library" && rmfiles="$rmfiles $dir/$old_library" - - $show "$rm $rmfiles" - $run $rm $rmfiles - - if test -n "$library_names"; then - # Do each command in the postuninstall commands. - eval cmds=\"$postuninstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do + # Possibly a libtool archive, so verify it. + if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + . $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + rmfiles="$rmfiles $objdir/$n" + done + test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library" + test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i" + + if test $mode = uninstall; then + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + eval cmds=\"$postuninstall_cmds\" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" + if test $? != 0 && test "$rmforce" != yes; then + exit_status=1 + fi + done IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - done - IFS="$save_ifs" - fi + fi - if test -n "$old_library"; then - # Do each command in the old_postuninstall commands. - eval cmds=\"$old_postuninstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS=';' - for cmd in $cmds; do + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + eval cmds=\"$old_postuninstall_cmds\" + save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" + if test $? != 0 && test "$rmforce" != yes; then + exit_status=1 + fi + done IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - done - IFS="$save_ifs" + fi + # FIXME: should reinstall the best remaining shared library. fi - - # FIXME: should reinstall the best remaining shared library. - fi - ;; + fi + ;; *.lo) - if test "$build_old_libs" = yes; then - oldobj=`$echo "X$name" | $Xsed -e 's/\.lo$/\.o/'` - rmfiles="$rmfiles $dir/$oldobj" - fi - $show "$rm $rmfiles" - $run $rm $rmfiles - ;; + if test "$build_old_libs" = yes; then + oldobj=`$echo "X$name" | $Xsed -e "$lo2o"` + rmfiles="$rmfiles $dir/$oldobj" + fi + ;; *) - $show "$rm $rmfiles" - $run $rm $rmfiles + # Do a test to see if this is a libtool program. + if test $mode = clean && + (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + relink_command= + . $dir/$file + + rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}" + if test "$fast_install" = yes && test -n "$relink_command"; then + rmfiles="$rmfiles $objdir/lt-$name" + fi + fi ;; esac + $show "$rm $rmfiles" + $run $rm $rmfiles || exit_status=1 done - exit 0 + + # Try to remove the ${objdir}s in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + $show "rmdir $dir" + $run rmdir $dir >/dev/null 2>&1 + fi + done + + exit $exit_status ;; "") @@ -2285,20 +4856,29 @@ libdir='$install_libdir'\ ;; esac - $echo "$modename: invalid operation mode \`$mode'" 1>&2 - $echo "$generic_help" 1>&2 - exit 1 + if test -z "$exec_cmd"; then + $echo "$modename: invalid operation mode \`$mode'" 1>&2 + $echo "$generic_help" 1>&2 + exit 1 + fi fi # test -z "$show_help" +if test -n "$exec_cmd"; then + eval exec $exec_cmd + exit 1 +fi + # We need to display help for each of the modes. -case "$mode" in +case $mode in "") $echo \ "Usage: $modename [OPTION]... [MODE-ARG]... Provide generalized library-building support services. + --config show all configuration variables + --debug enable verbose shell tracing -n, --dry-run display commands without modifying any files - --features display configuration information and exit + --features display basic configuration information and exit --finish same as \`--mode=finish' --help display this help message and exit --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS] @@ -2308,6 +4888,7 @@ Provide generalized library-building support services. MODE must be one of the following: + clean remove files from the build directory compile compile a source file into a libtool object execute automatically set library path, then run a program finish complete the installation of libtool libraries @@ -2320,12 +4901,33 @@ a more detailed description of MODE." exit 0 ;; +clean) + $echo \ +"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + compile) $echo \ "Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE Compile a source file into a libtool library object. +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -prefer-pic try to building PIC objects only + -prefer-non-pic try to building non-PIC objects only + -static always build a \`.o' file suitable for static linking + COMPILE-COMMAND is a command to be used in creating a \`standard' object file from the given SOURCEFILE. @@ -2392,18 +4994,27 @@ a program from several object files. The following components of LINK-COMMAND are treated specially: -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime - -dlpreopen FILE link in FILE and add its symbols to dld_preloaded_symbols + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX -LLIBDIR search LIBDIR for required installed libraries -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable -no-undefined declare that a library does not refer to external symbols -o OUTPUT-FILE create OUTPUT-FILE from the specified objects -release RELEASE specify package release information -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries -static do not do any dynamic linking of libtool libraries -version-info CURRENT[:REVISION[:AGE]] - specify library version info [each variable defaults to 0] + specify library version info [each variable defaults to 0] All other options (arguments beginning with \`-') are ignored. @@ -2411,18 +5022,19 @@ Every other argument is treated as a filename. Files ending in \`.la' are treated as uninstalled libtool libraries, other files are standard or library object files. -If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only -library objects (\`.lo' files) may be specified, and \`-rpath' is required. +If the OUTPUT-FILE ends in \`.la', then a libtool library is created, +only library objects (\`.lo' files) may be specified, and \`-rpath' is +required, except when creating a convenience library. -If OUTPUT-FILE ends in \`.a', then a standard library is created using \`ar' -and \`ranlib'. +If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created +using \`ar' and \`ranlib', or on Windows using \`lib'. -If OUTPUT-FILE ends in \`.lo' or \`.o', then a reloadable object file is -created, otherwise an executable program is created." +If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file +is created, otherwise an executable program is created." ;; uninstall) - $echo + $echo \ "Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... Remove libraries from an installation directory. diff --git a/makecfg.c b/makecfg.c new file mode 100644 index 0000000..787ea37 --- /dev/null +++ b/makecfg.c @@ -0,0 +1,300 @@ +/* + * makecfg.c + * + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * Last Modified : March 23, 2005 + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + +#ifndef offsetof /* defined in */ +#define offsetof(type, mem) ((size_t) \ + ((char *)&((type *)0)->mem - (char *)(type *)0)) +#endif + +void +print_structure_offset (void) +{ + printf("\n"); + printf("; ---- macros for structure access -----------------------------------------\n"); + printf("\n"); + + printf("; struct jpeg_compress_struct\n\n"); + printf("%%define jcstruct_image_width(b) ((b) + %3u) ; cinfo->image_width\n", + (unsigned)offsetof(struct jpeg_compress_struct, image_width)); + printf("%%define jcstruct_max_v_samp_factor(b) ((b) + %3u) ; cinfo->max_v_samp_factor\n", + (unsigned)offsetof(struct jpeg_compress_struct, max_v_samp_factor)); + printf("\n"); + + printf("; struct jpeg_decompress_struct\n\n"); + printf("%%define jdstruct_output_width(b) ((b) + %3u) ; cinfo->output_width\n", + (unsigned)offsetof(struct jpeg_decompress_struct, output_width)); + printf("%%define jdstruct_max_v_samp_factor(b) ((b) + %3u) ; cinfo->max_v_samp_factor\n", + (unsigned)offsetof(struct jpeg_decompress_struct, max_v_samp_factor)); + printf("%%define jdstruct_sample_range_limit(b) ((b) + %3u) ; cinfo->sample_range_limit\n", + (unsigned)offsetof(struct jpeg_decompress_struct, sample_range_limit)); + printf("\n"); + + printf("; jpeg_component_info\n\n"); + printf("%%define jcompinfo_v_samp_factor(b) ((b) + %2u) ; compptr->v_samp_factor\n", + (unsigned)offsetof(jpeg_component_info, v_samp_factor)); + printf("%%define jcompinfo_width_in_blocks(b) ((b) + %2u) ; compptr->width_in_blocks\n", + (unsigned)offsetof(jpeg_component_info, width_in_blocks)); + printf("%%define jcompinfo_downsampled_width(b) ((b) + %2u) ; compptr->downsampled_width\n", + (unsigned)offsetof(jpeg_component_info, downsampled_width)); + printf("%%define jcompinfo_dct_table(b) ((b) + %2u) ; compptr->dct_table\n", + (unsigned)offsetof(jpeg_component_info, dct_table)); + printf("\n"); +} + + +void +print_jconfig_h_macro (void) +{ + printf("\n"); + printf("; ---- macros from jconfig.h -----------------------------------------------\n"); + printf("\n"); + +#ifdef NEED_SHORT_EXTERNAL_NAMES + printf("%%define NEED_SHORT_EXTERNAL_NAMES\t; Use short forms of external names\n"); +#else + printf("%%undef NEED_SHORT_EXTERNAL_NAMES\t; Use short forms of external names\n"); +#endif + printf("\n"); +} + + +void +print_jmorecfg_h_macro (void) +{ + printf("\n"); + printf("; ---- macros from jmorecfg.h ----------------------------------------------\n"); + printf("\n"); + + printf("; Capability options common to encoder and decoder:\n"); + printf("\n"); +#ifdef DCT_ISLOW_SUPPORTED + printf("%%define DCT_ISLOW_SUPPORTED\t; slow but accurate integer algorithm\n"); +#else + printf("%%undef DCT_ISLOW_SUPPORTED\t; slow but accurate integer algorithm\n"); +#endif +#ifdef DCT_IFAST_SUPPORTED + printf("%%define DCT_IFAST_SUPPORTED\t; faster, less accurate integer method\n"); +#else + printf("%%undef DCT_IFAST_SUPPORTED\t; faster, less accurate integer method\n"); +#endif +#ifdef DCT_FLOAT_SUPPORTED + printf("%%define DCT_FLOAT_SUPPORTED\t; floating-point: accurate, fast on fast HW\n"); +#else + printf("%%undef DCT_FLOAT_SUPPORTED\t; floating-point: accurate, fast on fast HW\n"); +#endif + printf("\n"); + + printf("; Decoder capability options:\n"); + printf("\n"); +#ifdef IDCT_SCALING_SUPPORTED + printf("%%define IDCT_SCALING_SUPPORTED\t\t; Output rescaling via IDCT?\n"); +#else + printf("%%undef IDCT_SCALING_SUPPORTED\t\t; Output rescaling via IDCT?\n"); +#endif +#ifdef UPSAMPLE_MERGING_SUPPORTED + printf("%%define UPSAMPLE_MERGING_SUPPORTED\t; Fast path for sloppy upsampling?\n"); +#else + printf("%%undef UPSAMPLE_MERGING_SUPPORTED\t; Fast path for sloppy upsampling?\n"); +#endif +#ifdef UPSAMPLE_H1V2_SUPPORTED + printf("%%define UPSAMPLE_H1V2_SUPPORTED\t\t; Fast/fancy processing for 1h2v?\n"); +#else + printf("%%undef UPSAMPLE_H1V2_SUPPORTED\t\t; Fast/fancy processing for 1h2v?\n"); +#endif + printf("\n"); + +#if (RGB_PIXELSIZE == 3 || RGB_PIXELSIZE == 4) && \ + (RGB_RED < 0 || RGB_RED >= RGB_PIXELSIZE || RGB_GREEN < 0 || \ + RGB_GREEN >= RGB_PIXELSIZE || RGB_BLUE < 0 || RGB_BLUE >= RGB_PIXELSIZE || \ + RGB_RED == RGB_GREEN || RGB_GREEN == RGB_BLUE || RGB_RED == RGB_BLUE) +#error "Incorrect RGB pixel offset." +#endif + printf("; Ordering of RGB data in scanlines passed to or from the application.\n"); + printf("\n"); + printf("%%define RGB_RED\t\t%u\t; Offset of Red in an RGB scanline element\n", RGB_RED); + printf("%%define RGB_GREEN\t%u\t; Offset of Green\n", RGB_GREEN); + printf("%%define RGB_BLUE\t%u\t; Offset of Blue\n", RGB_BLUE); + printf("%%define RGB_PIXELSIZE\t%u\t; JSAMPLEs per RGB scanline element\n", RGB_PIXELSIZE); + printf("\n"); +#ifdef RGBX_FILLER_0XFF + printf("%%define RGBX_FILLER_0XFF\t; fill dummy bytes with 0xFF in RGBX format\n"); +#else + printf("%%undef RGBX_FILLER_0XFF\t\t; fill dummy bytes with 0xFF in RGBX format\n"); +#endif + printf("\n"); + + printf("; SIMD support options (encoder):\n"); + printf("\n"); +#ifdef JCCOLOR_RGBYCC_MMX_SUPPORTED + printf("%%define JCCOLOR_RGBYCC_MMX_SUPPORTED\t; RGB->YCC conversion with MMX\n"); +#else + printf("%%undef JCCOLOR_RGBYCC_MMX_SUPPORTED\t; RGB->YCC conversion with MMX\n"); +#endif +#ifdef JCCOLOR_RGBYCC_SSE2_SUPPORTED + printf("%%define JCCOLOR_RGBYCC_SSE2_SUPPORTED\t; RGB->YCC conversion with SSE2\n"); +#else + printf("%%undef JCCOLOR_RGBYCC_SSE2_SUPPORTED\t; RGB->YCC conversion with SSE2\n"); +#endif +#ifdef JCSAMPLE_MMX_SUPPORTED + printf("%%define JCSAMPLE_MMX_SUPPORTED\t\t; downsampling with MMX\n"); +#else + printf("%%undef JCSAMPLE_MMX_SUPPORTED\t\t; downsampling with MMX\n"); +#endif +#ifdef JCSAMPLE_SSE2_SUPPORTED + printf("%%define JCSAMPLE_SSE2_SUPPORTED\t\t; downsampling with SSE2\n"); +#else + printf("%%undef JCSAMPLE_SSE2_SUPPORTED\t\t; downsampling with SSE2\n"); +#endif +#ifdef JFDCT_INT_MMX_SUPPORTED + printf("%%define JFDCT_INT_MMX_SUPPORTED\t\t; forward DCT with MMX\n"); +#else + printf("%%undef JFDCT_INT_MMX_SUPPORTED\t\t; forward DCT with MMX\n"); +#endif +#ifdef JFDCT_INT_SSE2_SUPPORTED + printf("%%define JFDCT_INT_SSE2_SUPPORTED\t; forward DCT with SSE2\n"); +#else + printf("%%undef JFDCT_INT_SSE2_SUPPORTED\t\t; forward DCT with SSE2\n"); +#endif +#ifdef JFDCT_FLT_3DNOW_MMX_SUPPORTED + printf("%%define JFDCT_FLT_3DNOW_MMX_SUPPORTED\t; forward DCT with 3DNow!/MMX\n"); +#else + printf("%%undef JFDCT_FLT_3DNOW_MMX_SUPPORTED\t; forward DCT with 3DNow!/MMX\n"); +#endif +#ifdef JFDCT_FLT_SSE_MMX_SUPPORTED + printf("%%define JFDCT_FLT_SSE_MMX_SUPPORTED\t; forward DCT with SSE/MMX\n"); +#else + printf("%%undef JFDCT_FLT_SSE_MMX_SUPPORTED\t; forward DCT with SSE/MMX\n"); +#endif +#ifdef JFDCT_FLT_SSE_SSE2_SUPPORTED + printf("%%define JFDCT_FLT_SSE_SSE2_SUPPORTED\t; forward DCT with SSE/SSE2\n"); +#else + printf("%%undef JFDCT_FLT_SSE_SSE2_SUPPORTED\t; forward DCT with SSE/SSE2\n"); +#endif +#ifdef JFDCT_INT_QUANTIZE_WITH_DIVISION + printf("%%define JFDCT_INT_QUANTIZE_WITH_DIVISION ; Use general quantization method\n"); +#else + printf("%%undef JFDCT_INT_QUANTIZE_WITH_DIVISION ; Use general quantization method\n"); +#endif + printf("\n"); + + printf("; SIMD support options (decoder):\n"); + printf("\n"); +#ifdef JDCOLOR_YCCRGB_MMX_SUPPORTED + printf("%%define JDCOLOR_YCCRGB_MMX_SUPPORTED\t; YCC->RGB conversion with MMX\n"); +#else + printf("%%undef JDCOLOR_YCCRGB_MMX_SUPPORTED\t; YCC->RGB conversion with MMX\n"); +#endif +#ifdef JDCOLOR_YCCRGB_SSE2_SUPPORTED + printf("%%define JDCOLOR_YCCRGB_SSE2_SUPPORTED\t; YCC->RGB conversion with SSE2\n"); +#else + printf("%%undef JDCOLOR_YCCRGB_SSE2_SUPPORTED\t; YCC->RGB conversion with SSE2\n"); +#endif +#ifdef JDMERGE_MMX_SUPPORTED + printf("%%define JDMERGE_MMX_SUPPORTED\t\t; merged upsampling with MMX\n"); +#else + printf("%%undef JDMERGE_MMX_SUPPORTED\t\t; merged upsampling with MMX\n"); +#endif +#ifdef JDMERGE_SSE2_SUPPORTED + printf("%%define JDMERGE_SSE2_SUPPORTED\t\t; merged upsampling with SSE2\n"); +#else + printf("%%undef JDMERGE_SSE2_SUPPORTED\t\t; merged upsampling with SSE2\n"); +#endif +#ifdef JDSAMPLE_FANCY_MMX_SUPPORTED + printf("%%define JDSAMPLE_FANCY_MMX_SUPPORTED\t; fancy upsampling with MMX\n"); +#else + printf("%%undef JDSAMPLE_FANCY_MMX_SUPPORTED\t; fancy upsampling with MMX\n"); +#endif +#ifdef JDSAMPLE_FANCY_SSE2_SUPPORTED + printf("%%define JDSAMPLE_FANCY_SSE2_SUPPORTED\t; fancy upsampling with SSE2\n"); +#else + printf("%%undef JDSAMPLE_FANCY_SSE2_SUPPORTED\t; fancy upsampling with SSE2\n"); +#endif +#ifdef JDSAMPLE_SIMPLE_MMX_SUPPORTED + printf("%%define JDSAMPLE_SIMPLE_MMX_SUPPORTED\t; sloppy upsampling with MMX\n"); +#else + printf("%%undef JDSAMPLE_SIMPLE_MMX_SUPPORTED\t; sloppy upsampling with MMX\n"); +#endif +#ifdef JDSAMPLE_SIMPLE_SSE2_SUPPORTED + printf("%%define JDSAMPLE_SIMPLE_SSE2_SUPPORTED\t; sloppy upsampling with SSE2\n"); +#else + printf("%%undef JDSAMPLE_SIMPLE_SSE2_SUPPORTED\t; sloppy upsampling with SSE2\n"); +#endif +#ifdef JIDCT_INT_MMX_SUPPORTED + printf("%%define JIDCT_INT_MMX_SUPPORTED\t\t; inverse DCT with MMX\n"); +#else + printf("%%undef JIDCT_INT_MMX_SUPPORTED\t\t; inverse DCT with MMX\n"); +#endif +#ifdef JIDCT_INT_SSE2_SUPPORTED + printf("%%define JIDCT_INT_SSE2_SUPPORTED\t; inverse DCT with SSE2\n"); +#else + printf("%%undef JIDCT_INT_SSE2_SUPPORTED\t\t; inverse DCT with SSE2\n"); +#endif +#ifdef JIDCT_FLT_3DNOW_MMX_SUPPORTED + printf("%%define JIDCT_FLT_3DNOW_MMX_SUPPORTED\t; inverse DCT with 3DNow!/MMX\n"); +#else + printf("%%undef JIDCT_FLT_3DNOW_MMX_SUPPORTED\t; inverse DCT with 3DNow!/MMX\n"); +#endif +#ifdef JIDCT_FLT_SSE_MMX_SUPPORTED + printf("%%define JIDCT_FLT_SSE_MMX_SUPPORTED\t; inverse DCT with SSE/MMX\n"); +#else + printf("%%undef JIDCT_FLT_SSE_MMX_SUPPORTED\t; inverse DCT with SSE/MMX\n"); +#endif +#ifdef JIDCT_FLT_SSE_SSE2_SUPPORTED + printf("%%define JIDCT_FLT_SSE_SSE2_SUPPORTED\t; inverse DCT with SSE/SSE2\n"); +#else + printf("%%undef JIDCT_FLT_SSE_SSE2_SUPPORTED\t; inverse DCT with SSE/SSE2\n"); +#endif + printf("\n"); +} + + +void +print_jpeglib_h_macro (void) +{ + printf("\n"); + printf("; ---- macros from jpeglib.h ----------------------------------------------\n"); + printf("\n"); + + printf("; Version ID for the JPEG library.\n"); + printf("; Might be useful for tests like \"#if JPEG_LIB_VERSION >= 60\".\n"); + printf("\n"); + printf("%%define JPEG_LIB_VERSION %d\n", JPEG_LIB_VERSION); + printf("\n"); + printf("; SIMD Ext: Version ID for the SIMD extension.\n"); + printf("\n"); + printf("%%define JPEG_SIMDEXT_VERSION %d\n", JPEG_SIMDEXT_VERSION); + printf("%%define JPEG_SIMDEXT_VER_STR \"%s\"\n", JPEG_SIMDEXT_VER_STR); + printf("\n"); +} + + +int +main (void) +{ + printf(";\n; jsimdcfg.inc --- generated by makecfg.c"); +#ifdef __DATE__ +#ifdef __TIME__ + printf(" (%s, %s)", __DATE__, __TIME__); +#endif +#endif + printf("\n;\n\n"); + printf("%%define JSIMDCFG_INCLUDED\t; so that jsimdcfg.inc doesn't do it again\n\n"); + + print_structure_offset(); + print_jconfig_h_macro(); + print_jmorecfg_h_macro(); + print_jpeglib_h_macro(); + + exit(0); + return 0; /* suppress no-return-value warnings */ +} diff --git a/makefile.ansi b/makefile.ansi index 8291913..fb830fc 100644 --- a/makefile.ansi +++ b/makefile.ansi @@ -1,4 +1,5 @@ # Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension # This makefile is suitable for Unix-like systems with ANSI-capable compilers. # If you have a non-ANSI compiler, makefile.unix is a better starting point. @@ -13,6 +14,13 @@ CFLAGS= -O # Generally, we recommend defining any configuration symbols in jconfig.h, # NOT via -D switches here. +# The executable name of NASM and its options: +NASM= nasm +NAFLAGS= $(NASM_OBJFMT) -I./ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -felf -DELF + # Link-time cc options: LDFLAGS= @@ -24,6 +32,10 @@ LDLIBS= # to use jmemansi.o or jmemname.o if you have limited swap space. SYSDEPMEM= jmemnobs.o +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimdgcc.o + # miscellaneous OS-dependent stuff # linker LN= $(CC) @@ -75,17 +87,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) # library object files common to compression and decompression -COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) # compression library object files CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ - jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \ - jfdctint.o + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o # decompression library object files DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ - jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \ - jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o # These objectfiles are included in libjpeg.a LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) # object files for sample applications (excluding library files) @@ -125,7 +143,7 @@ jconfig.h: jconfig.doc clean: $(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom - $(RM) core testout* + $(RM) jsimdcfg.inc core testout* test: cjpeg djpeg jpegtran $(RM) testout* @@ -143,10 +161,63 @@ test: cjpeg djpeg jpegtran cmp testorig.jpg testoutt.jpg +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg ./makecfg.c $(LDLIBS) + ./makecfg > jsimdcfg.inc + $(RM) ./makecfg + +.asm.o: + $(NASM) $(NAFLAGS) -o $@ $*.asm + +jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h @@ -157,33 +228,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror. jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h -jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h diff --git a/makefile.bc5 b/makefile.bc5 new file mode 100644 index 0000000..f3f7df6 --- /dev/null +++ b/makefile.bc5 @@ -0,0 +1,320 @@ +# Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension + +# This makefile is suitable for Borland C++ Compiler 5.5 (win32) + +# Read installation instructions before saying "make" !! + +!ifndef srcdir +srcdir = . +!endif +.path.c = $(srcdir) +.path.h = $(srcdir) +.path.asm = $(srcdir) +.path.inc = $(srcdir);. +.path.doc = $(srcdir) + +# The name of your C compiler: +CC= bcc32 + +# You may need to adjust these cc options: +CFLAGS= -O2 -OS -Oc -d -ff -w-par -w-aus -w-ccc -w-rch -q -I$(srcdir) +# Generally, we recommend defining any configuration symbols in jconfig.h, +# NOT via -D switches here. + +# The executable name of NASM and its options: +NASM= nasmw +NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fobj -DOBJ32 + +# Link-time cc options: +LDFLAGS= -tWC -q + +# To link any special libraries, add the necessary -l commands here. +LDLIBS= noeh32.lib + +# Put here the object file name for the correct system-dependent memory +# manager file. For Win32, we recommend jmemnobs.c (flat memory!) +# SYSDEPMEMLIB must list the same files with "+" signs for the librarian. +SYSDEPMEM= jmemnobs.obj +SYSDEPMEMLIB= +jmemnobs.obj + +# OS-dependent SIMD instruction support checker +# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc) +SYSDEPSIMDCHK= jsimdw32.obj +SYSDEPSIMDCHKLIB= +jsimdw32.obj + +# End of configurable options. + + +# source files: JPEG library proper +LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \ + jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \ + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \ + jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \ + jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \ + jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \ + jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \ + jquant2.c jutils.c jmemmgr.c +# memmgr back ends: compile only one of these into a working library +SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c +# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom +APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \ + rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \ + rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c +SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES) +# files included by source files +INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h +# documentation, test, and support files +DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \ + wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \ + coderules.doc filelist.doc change.log +MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \ + makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \ + makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \ + maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \ + makvms.opt +CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \ + jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \ + jconfig.vms +CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh +OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm +TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ + testimgp.jpg +DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ + $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) +# library object files common to compression and decompression +COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \ + jsimdcpu.obj $(SYSDEPSIMDCHK) +# compression library object files +CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \ + jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \ + jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \ + jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \ + jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \ + jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \ + jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \ + jfsseflt.obj +# decompression library object files +DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \ + jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \ + jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \ + jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \ + jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \ + jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \ + jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \ + jdcolss2.obj jdmermmx.obj jdmerss2.obj +# These objectfiles are included in libjpeg.lib +LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) +# object files for sample applications (excluding library files) +COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \ + rdswitch.obj cdjpeg.obj +DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \ + rdcolmap.obj cdjpeg.obj +TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj + + +all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe + +libjpeg.lib: $(LIBOBJECTS) + - del libjpeg.lib + tlib libjpeg.lib /E /C @&&| ++jcapimin.obj +jcapistd.obj +jctrans.obj +jcparam.obj +jdatadst.obj & ++jcinit.obj +jcmaster.obj +jcmarker.obj +jcmainct.obj +jcprepct.obj & ++jccoefct.obj +jccolor.obj +jcsample.obj +jchuff.obj +jcphuff.obj & ++jcdctmgr.obj +jccolmmx.obj +jccolss2.obj +jcsammmx.obj +jcsamss2.obj & ++jcqntint.obj +jcqntflt.obj +jcqntmmx.obj +jcqnt3dn.obj +jcqnts2i.obj & ++jcqntsse.obj +jcqnts2f.obj +jfdctint.obj +jfdctfst.obj +jfdctflt.obj & ++jfmmxint.obj +jfmmxfst.obj +jf3dnflt.obj +jfss2int.obj +jfss2fst.obj & ++jfsseflt.obj +jdapimin.obj +jdapistd.obj +jdtrans.obj +jdatasrc.obj & ++jdmaster.obj +jdinput.obj +jdmarker.obj +jdhuff.obj +jdphuff.obj & ++jdmainct.obj +jdcoefct.obj +jdpostct.obj +jddctmgr.obj +jdsample.obj & ++jdcolor.obj +jquant1.obj +jquant2.obj +jdmerge.obj +jidctint.obj & ++jidctfst.obj +jidctred.obj +jidctflt.obj +jimmxint.obj +jimmxfst.obj & ++jimmxred.obj +ji3dnflt.obj +jiss2int.obj +jiss2fst.obj +jiss2red.obj & ++jisseflt.obj +jiss2flt.obj +jdsammmx.obj +jdsamss2.obj +jdcolmmx.obj & ++jdcolss2.obj +jdmermmx.obj +jdmerss2.obj +jcomapi.obj +jutils.obj & ++jerror.obj +jmemmgr.obj $(SYSDEPMEMLIB) +jsimdcpu.obj $(SYSDEPSIMDCHKLIB) +| + +cjpeg.exe: $(COBJECTS) libjpeg.lib + $(CC) $(LDFLAGS) -ecjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS) + +djpeg.exe: $(DOBJECTS) libjpeg.lib + $(CC) $(LDFLAGS) -edjpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS) + +jpegtran.exe: $(TROBJECTS) libjpeg.lib + $(CC) $(LDFLAGS) -ejpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS) + +rdjpgcom.exe: rdjpgcom.obj + $(CC) $(LDFLAGS) -erdjpgcom.exe rdjpgcom.obj $(LDLIBS) + +wrjpgcom.exe: wrjpgcom.obj + $(CC) $(LDFLAGS) -ewrjpgcom.exe wrjpgcom.obj $(LDLIBS) + +# This "{}" syntax allows Borland Make to "batch" source files. +# In this way, each run of the compiler can build many modules. +.c.obj: + $(CC) $(CFLAGS) -c{ $<} + +jconfig.h: jconfig.doc + echo You must prepare a system-dependent jconfig.h file. + echo Please read the installation directions in install.doc. + exit 1 + +clean: + - del *.obj + - del *.tds + - del cjpeg.exe + - del djpeg.exe + - del jpegtran.exe + - del rdjpgcom.exe + - del wrjpgcom.exe + - del jsimdcfg.inc + - del libjpeg.lib + - del testout*.* + +test: cjpeg.exe djpeg.exe jpegtran.exe + - del testout*.* + djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg + djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg + cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm + djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg + cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm + jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg + fc /b $(srcdir)\testimg.ppm testout.ppm + fc /b $(srcdir)\testimg.bmp testout.bmp + fc /b $(srcdir)\testimg.jpg testout.jpg + fc /b $(srcdir)\testimg.ppm testoutp.ppm + fc /b $(srcdir)\testimgp.jpg testoutp.jpg + fc /b $(srcdir)\testorig.jpg testoutt.jpg + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(srcdir)\makecfg.c + $(CC) $(LDFLAGS) -emakecfg.exe makecfg.obj $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + - del makecfg.tds + - del makecfg.obj + - del makecfg.exe + +.asm.obj: + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + +jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h +# jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h +rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h +wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h +cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h +rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h diff --git a/makefile.cfg b/makefile.cfg index f25e42e..c18b7e6 100644 --- a/makefile.cfg +++ b/makefile.cfg @@ -1,4 +1,5 @@ # Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension # makefile.cfg is edited by configure to produce a custom Makefile. @@ -16,8 +17,9 @@ libdir = $(exec_prefix)/lib includedir = $(prefix)/include binprefix = manprefix = -manext = 1 -mandir = $(prefix)/man/man$(manext) +manext = .1 +mandir = $(prefix)/man +man1dir = $(mandir)/man1 # The name of your C compiler: CC= @CC@ @@ -29,6 +31,10 @@ CFLAGS= @CFLAGS@ @CPPFLAGS@ @INCLUDEFLAGS@ # However, any special defines for ansi2knr.c may be included here: ANSI2KNRFLAGS= @ANSI2KNRFLAGS@ +# The executable name of NASM and its options: +NASM= @NASM@ +NAFLAGS= @NAFLAGS@ @INCLUDEFLAGS@ + # Link-time cc options: LDFLAGS= @LDFLAGS@ @@ -37,6 +43,7 @@ LDLIBS= @LIBS@ # If using GNU libtool, LIBTOOL references it; if not, LIBTOOL is empty. LIBTOOL = @LIBTOOL@ +top_builddir = . # $(O) expands to "lo" if using libtool, plain "o" if not. # Similarly, $(A) expands to "la" or "a". O = @O@ @@ -51,8 +58,12 @@ JPEG_LIB_VERSION = @JPEG_LIB_VERSION@ # to use jmemansi.o or jmemname.o if you have limited swap space. SYSDEPMEM= @MEMORYMGR@ +# OS-dependent SIMD instruction support checker +# jsimdw32.$(O) (Win32) / jsimddjg.$(O) (DJGPP V.2) / jsimdgcc.$(O) (Unix/gcc) +SYSDEPSIMDCHK= @SIMDCHECKER@ + # miscellaneous OS-dependent stuff -SHELL= /bin/sh +SHELL= @SHELL@ # linker LN= @LN@ # file deletion command @@ -68,6 +79,11 @@ INSTALL= @INSTALL@ INSTALL_PROGRAM= @INSTALL_PROGRAM@ INSTALL_LIB= @INSTALL_LIB@ INSTALL_DATA= @INSTALL_DATA@ +# uninstallation program +UNINSTALL= @UNINSTALL@ +# executable suffix. under cygwin, +# 'rm' doesn't know that executables have .exe suffix. +EXE = @EXEEXT@ # End of configurable options. @@ -110,19 +126,26 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) # library object files common to compression and decompression -COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM) +COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM) \ + jsimdcpu.$(O) $(SYSDEPSIMDCHK) # compression library object files CLIBOBJECTS= jcapimin.$(O) jcapistd.$(O) jctrans.$(O) jcparam.$(O) \ jdatadst.$(O) jcinit.$(O) jcmaster.$(O) jcmarker.$(O) jcmainct.$(O) \ jcprepct.$(O) jccoefct.$(O) jccolor.$(O) jcsample.$(O) jchuff.$(O) \ - jcphuff.$(O) jcdctmgr.$(O) jfdctfst.$(O) jfdctflt.$(O) \ - jfdctint.$(O) + jcphuff.$(O) jcdctmgr.$(O) jccolmmx.$(O) jccolss2.$(O) jcsammmx.$(O) \ + jcsamss2.$(O) jcqntint.$(O) jcqntflt.$(O) jcqntmmx.$(O) jcqnt3dn.$(O) \ + jcqnts2i.$(O) jcqntsse.$(O) jcqnts2f.$(O) jfdctint.$(O) jfdctfst.$(O) \ + jfdctflt.$(O) jfmmxint.$(O) jfmmxfst.$(O) jf3dnflt.$(O) jfss2int.$(O) \ + jfss2fst.$(O) jfsseflt.$(O) # decompression library object files DLIBOBJECTS= jdapimin.$(O) jdapistd.$(O) jdtrans.$(O) jdatasrc.$(O) \ jdmaster.$(O) jdinput.$(O) jdmarker.$(O) jdhuff.$(O) jdphuff.$(O) \ - jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) \ - jidctfst.$(O) jidctflt.$(O) jidctint.$(O) jidctred.$(O) \ - jdsample.$(O) jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O) + jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) jdsample.$(O) \ + jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O) jidctint.$(O) \ + jidctfst.$(O) jidctred.$(O) jidctflt.$(O) jimmxint.$(O) jimmxfst.$(O) \ + jimmxred.$(O) ji3dnflt.$(O) jiss2int.$(O) jiss2fst.$(O) jiss2red.$(O) \ + jisseflt.$(O) jiss2flt.$(O) jdsammmx.$(O) jdsamss2.$(O) jdcolmmx.$(O) \ + jdcolss2.$(O) jdmermmx.$(O) jdmerss2.$(O) # These objectfiles are included in libjpeg.a LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) # object files for sample applications (excluding library files) @@ -136,12 +159,19 @@ TROBJECTS= jpegtran.$(O) rdswitch.$(O) cdjpeg.$(O) transupp.$(O) all: @A2K_DEPS@ libjpeg.$(A) cjpeg djpeg jpegtran rdjpgcom wrjpgcom # Special compilation rules to support ansi2knr and libtool. -.SUFFIXES: .lo .la +.SUFFIXES: .lo .la .asm + +.asm.o: + $(SHELL) $(srcdir)/nasm_lt.sh $(NASM) $(NAFLAGS) $(srcdir)/$*.asm # How to compile with libtool. @COM_LT@.c.lo: @COM_LT@ $(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c $(srcdir)/$*.c +@COM_LT@.asm.lo: +@COM_LT@ $(LIBTOOL) --mode=compile @TAGCC@ $(SHELL) $(srcdir)/nasm_lt.sh \ +@COM_LT@ $(NASM) $(NAFLAGS) $(srcdir)/$*.asm + # How to use ansi2knr, when not using libtool. @COM_A2K@.c.o: @COM_A2K@ ./ansi2knr $(srcdir)/$*.c knr/$*.c @@ -169,7 +199,7 @@ libjpeg.a: @A2K_DEPS@ $(LIBOBJECTS) # with libtool: libjpeg.la: @A2K_DEPS@ $(LIBOBJECTS) $(LIBTOOL) --mode=link $(CC) -o libjpeg.la $(LIBOBJECTS) \ - -rpath $(libdir) -version-info $(JPEG_LIB_VERSION) + -no-undefined -rpath $(libdir) -version-info $(JPEG_LIB_VERSION) # sample programs: @@ -191,34 +221,62 @@ wrjpgcom: wrjpgcom.$(O) # Installation rules: install: cjpeg djpeg jpegtran rdjpgcom wrjpgcom @FORCE_INSTALL_LIB@ + -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi + -@if [ ! -d $(man1dir) ]; then mkdir -p $(man1dir); fi $(INSTALL_PROGRAM) cjpeg $(bindir)/$(binprefix)cjpeg $(INSTALL_PROGRAM) djpeg $(bindir)/$(binprefix)djpeg $(INSTALL_PROGRAM) jpegtran $(bindir)/$(binprefix)jpegtran $(INSTALL_PROGRAM) rdjpgcom $(bindir)/$(binprefix)rdjpgcom $(INSTALL_PROGRAM) wrjpgcom $(bindir)/$(binprefix)wrjpgcom - $(INSTALL_DATA) $(srcdir)/cjpeg.1 $(mandir)/$(manprefix)cjpeg.$(manext) - $(INSTALL_DATA) $(srcdir)/djpeg.1 $(mandir)/$(manprefix)djpeg.$(manext) - $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext) - $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext) - $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext) + $(INSTALL_DATA) $(srcdir)/cjpeg.1 $(man1dir)/$(manprefix)cjpeg$(manext) + $(INSTALL_DATA) $(srcdir)/djpeg.1 $(man1dir)/$(manprefix)djpeg$(manext) + $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(man1dir)/$(manprefix)jpegtran$(manext) + $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(man1dir)/$(manprefix)rdjpgcom$(manext) + $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(man1dir)/$(manprefix)wrjpgcom$(manext) install-lib: libjpeg.$(A) install-headers + -@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi $(INSTALL_LIB) libjpeg.$(A) $(libdir)/$(binprefix)libjpeg.$(A) install-headers: jconfig.h + -@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi $(INSTALL_DATA) jconfig.h $(includedir)/jconfig.h $(INSTALL_DATA) $(srcdir)/jpeglib.h $(includedir)/jpeglib.h $(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h $(INSTALL_DATA) $(srcdir)/jerror.h $(includedir)/jerror.h +# Uninstallation rules: + +uninstall: @UNINSTALL_LIB@ + $(UNINSTALL) $(bindir)/$(binprefix)cjpeg$(EXE) + $(UNINSTALL) $(bindir)/$(binprefix)djpeg$(EXE) + $(UNINSTALL) $(bindir)/$(binprefix)jpegtran$(EXE) + $(UNINSTALL) $(bindir)/$(binprefix)rdjpgcom$(EXE) + $(UNINSTALL) $(bindir)/$(binprefix)wrjpgcom$(EXE) + $(UNINSTALL) $(man1dir)/$(manprefix)cjpeg$(manext) + $(UNINSTALL) $(man1dir)/$(manprefix)djpeg$(manext) + $(UNINSTALL) $(man1dir)/$(manprefix)jpegtran$(manext) + $(UNINSTALL) $(man1dir)/$(manprefix)rdjpgcom$(manext) + $(UNINSTALL) $(man1dir)/$(manprefix)wrjpgcom$(manext) + +uninstall-lib: uninstall-headers + $(UNINSTALL) $(libdir)/$(binprefix)libjpeg.$(A) + +uninstall-headers: + $(UNINSTALL) $(includedir)/jconfig.h + $(UNINSTALL) $(includedir)/jpeglib.h + $(UNINSTALL) $(includedir)/jmorecfg.h + $(UNINSTALL) $(includedir)/jerror.h + clean: - $(RM) *.o *.lo libjpeg.a libjpeg.la - $(RM) cjpeg djpeg jpegtran rdjpgcom wrjpgcom - $(RM) ansi2knr core testout* config.log config.status + $(RM) jsimdcfg.inc *.o *.lo libjpeg.a libjpeg.la +# under cygwin, libtool will create wrapper scripts without suffix. + $(RM) cjpeg djpeg jpegtran cjpeg$(EXE) djpeg$(EXE) jpegtran$(EXE) + $(RM) rdjpgcom$(EXE) wrjpgcom$(EXE) ansi2knr$(EXE) core testout* $(RM) -r knr .libs _libs distclean: clean - $(RM) Makefile jconfig.h libtool config.cache + $(RM) Makefile jconfig.h libtool config.cache config.status config.log test: cjpeg djpeg jpegtran $(RM) testout* @@ -248,10 +306,60 @@ jconfig.h: jconfig.doc .PHONY: all install install-lib install-headers clean distclean test check +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg $(srcdir)/makecfg.c $(LDLIBS) + ./makecfg > jsimdcfg.inc + $(RM) makecfg$(EXE) + +jsimdcpu.$(O): jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.$(O): jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.$(O): jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.$(O): jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.$(O): jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.$(O): jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.$(O): jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.$(O): jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.$(O): jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.$(O): jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.$(O): jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.$(O): jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.$(O): jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.$(O): jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.$(O): jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.$(O): jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.$(O): jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.$(O): jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.$(O): jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.$(O): jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.$(O): jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.$(O): jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.$(O): jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.$(O): jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.$(O): jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.$(O): jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.$(O): jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.$(O): jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.$(O): jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.$(O): jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.$(O): jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.$(O): jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.$(O): jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.$(O): jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.$(O): jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.$(O): jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.$(O): ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.$(O): jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.$(O): jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.$(O): jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.$(O): jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.$(O): jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.$(O): jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + jcapimin.$(O): jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcapistd.$(O): jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jccoefct.$(O): jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jccolor.$(O): jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.$(O): jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jcdctmgr.$(O): jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jchuff.$(O): jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcinit.$(O): jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h @@ -262,33 +370,33 @@ jcomapi.$(O): jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerr jcparam.$(O): jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcphuff.$(O): jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcprepct.$(O): jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jcsample.$(O): jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.$(O): jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jctrans.$(O): jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapimin.$(O): jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapistd.$(O): jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdatadst.$(O): jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdatasrc.$(O): jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdcoefct.$(O): jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdcolor.$(O): jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.$(O): jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jddctmgr.$(O): jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jdhuff.$(O): jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdinput.$(O): jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmainct.$(O): jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmarker.$(O): jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmaster.$(O): jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdmerge.$(O): jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.$(O): jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdphuff.$(O): jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdpostct.$(O): jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdsample.$(O): jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.$(O): jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdtrans.$(O): jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jerror.$(O): jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h -jfdctflt.$(O): jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctfst.$(O): jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctflt.$(O): jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.$(O): jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jquant1.$(O): jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jquant2.$(O): jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jutils.$(O): jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h diff --git a/makefile.dj b/makefile.dj index f766d25..2186468 100644 --- a/makefile.dj +++ b/makefile.dj @@ -1,18 +1,34 @@ # Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension # This makefile is for DJGPP (Delorie's GNU C port on MS-DOS), v2.0 or later. # Thanks to Frank J. Donahoe for this version. # Read installation instructions before saying "make" !! +srcdir = . +VPATH = $(srcdir) + # The name of your C compiler: CC= gcc # You may need to adjust these cc options: -CFLAGS= -O2 -Wall -I. +# For gcc 3.4.x +CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \ + -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) +# For gcc 3.3.x +#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \ +# -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) # Generally, we recommend defining any configuration symbols in jconfig.h, # NOT via -D switches here. +# The executable name of NASM and its options: +NASM= nasm +NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fcoff -DDJGPP + # Link-time cc options: LDFLAGS= -s @@ -24,6 +40,10 @@ LDLIBS= # use jmemname.o if you want to use named temp files instead of swap space. SYSDEPMEM= jmemnobs.o +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimddjg.o + # miscellaneous OS-dependent stuff # linker LN= $(CC) @@ -75,17 +95,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) # library object files common to compression and decompression -COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) # compression library object files CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ - jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \ - jfdctint.o + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o # decompression library object files DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ - jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \ - jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o # These objectfiles are included in libjpeg.a LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) # object files for sample applications (excluding library files) @@ -130,29 +156,83 @@ clean: $(RM) jpegtran.exe $(RM) rdjpgcom.exe $(RM) wrjpgcom.exe + $(RM) jsimdcfg.inc $(RM) libjpeg.a $(RM) testout*.* test: cjpeg.exe djpeg.exe jpegtran.exe $(RM) testout*.* - ./djpeg -dct int -ppm -outfile testout.ppm testorig.jpg - ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp testorig.jpg - ./cjpeg -dct int -outfile testout.jpg testimg.ppm - ./djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg - ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm - ./jpegtran -outfile testoutt.jpg testprog.jpg - fc /b testimg.ppm testout.ppm - fc /b testimg.bmp testout.bmp - fc /b testimg.jpg testout.jpg - fc /b testimg.ppm testoutp.ppm - fc /b testimgp.jpg testoutp.jpg - fc /b testorig.jpg testoutt.jpg + ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg + ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg + ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm + ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg + ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm + ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg + fc /b $(srcdir)\testimg.ppm testout.ppm + fc /b $(srcdir)\testimg.bmp testout.bmp + fc /b $(srcdir)\testimg.jpg testout.jpg + fc /b $(srcdir)\testimg.ppm testoutp.ppm + fc /b $(srcdir)\testimgp.jpg testoutp.jpg + fc /b $(srcdir)\testorig.jpg testoutt.jpg + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + $(RM) makecfg.exe + +%.o : %.asm + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h @@ -163,33 +243,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror. jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h -jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h diff --git a/makefile.linux b/makefile.linux new file mode 100644 index 0000000..54e1d65 --- /dev/null +++ b/makefile.linux @@ -0,0 +1,449 @@ +# Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension + +# This makefile is for Linux ELF with gcc + +# Read installation instructions before saying "make" !! + +# For compiling with source and object files in different directories. +srcdir = . +VPATH = $(srcdir) + +# Where to install the programs and man pages. +prefix = /usr/local +exec_prefix = ${prefix} +bindir = $(exec_prefix)/bin +libdir = $(exec_prefix)/lib +includedir = $(prefix)/include +binprefix = +manprefix = +manext = 1 +mandir = $(prefix)/man/man$(manext) + +LNNAME = libjpeg.so +SONAME = libjpeg.so.62 +LIBNAME = libjpeg.so.62.1.0 + +# The name of your C compiler: +CC= gcc + +# You may need to adjust these cc options: +CFLAGS= -O2 -mcpu=i686 -march=i386 -I$(srcdir) +# Generally, we recommend defining any configuration symbols in jconfig.h, +# NOT via -D switches here. + +# The executable name of NASM and its options: +NASM= nasm +NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -felf -DELF + +# Link-time cc options: +LDFLAGS= + +# To link any special libraries, add the necessary -l commands here. +LDLIBS= + +# Put here the object file name for the correct system-dependent memory +# manager file. For Unix this is usually jmemnobs.o, but you may want +# to use jmemansi.o or jmemname.o if you have limited swap space. +SYSDEPMEM= jmemnobs.o + +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimdgcc.o + +# miscellaneous OS-dependent stuff +# linker +LN= $(CC) +# file deletion command +RM= rm -f +# library (.a) file creation command +AR= ar rc +# second step in .a creation (use "touch" if not needed) +AR2= ranlib +# installation program +INSTALL= install -c +INSTALL_PROGRAM= ${INSTALL} -s +INSTALL_SHARED = ${INSTALL} +INSTALL_LIB= ${INSTALL} -m 644 +INSTALL_DATA= ${INSTALL} -m 644 + +# End of configurable options. + + +# source files: JPEG library proper +LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \ + jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \ + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \ + jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \ + jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \ + jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \ + jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \ + jquant2.c jutils.c jmemmgr.c +# memmgr back ends: compile only one of these into a working library +SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c +# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom +APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \ + rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \ + rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c +SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES) +# files included by source files +INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h +# documentation, test, and support files +DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \ + wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \ + coderules.doc filelist.doc change.log +MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \ + makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \ + makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \ + maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \ + makvms.opt +CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \ + jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \ + jconfig.vms +CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh +OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm +TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ + testimgp.jpg +DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ + $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) +# library object files common to compression and decompression +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) +# compression library object files +CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ + jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o +# decompression library object files +DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ + jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o +# These objectfiles are included in libjpeg.a +LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) +# These objectfiles are included in libjpeg.so +DLLOBJECTS= $(LIBOBJECTS:.o=.pic.o) +# object files for sample applications (excluding library files) +COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \ + cdjpeg.o +DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \ + cdjpeg.o +TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o + + +all: static shared app +app: cjpeg djpeg jpegtran rdjpgcom wrjpgcom +app-static: cjpeg-static djpeg-static jpegtran-static +shared: $(LIBNAME) +static: libjpeg.a + +libjpeg.a: $(LIBOBJECTS) + $(RM) libjpeg.a + $(AR) libjpeg.a $(LIBOBJECTS) + $(AR2) libjpeg.a + +$(LIBNAME): $(DLLOBJECTS) + $(CC) -shared -Wl,-soname,$(SONAME) -o $(LIBNAME) $(DLLOBJECTS) + +$(SONAME): $(LIBNAME) + ln -sf $(LIBNAME) $(SONAME) + +$(LNNAME): $(SONAME) + ln -sf $(LIBNAME) $(LNNAME) + +cjpeg-static: $(COBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o cjpeg-static $(COBJECTS) libjpeg.a $(LDLIBS) + +djpeg-static: $(DOBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o djpeg-static $(DOBJECTS) libjpeg.a $(LDLIBS) + +jpegtran-static: $(TROBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o jpegtran-static $(TROBJECTS) libjpeg.a $(LDLIBS) + +cjpeg-shared: $(COBJECTS) $(LNNAME) + $(LN) $(LDFLAGS) -o cjpeg-shared $(COBJECTS) $(LNNAME) $(LDLIBS) + +djpeg-shared: $(DOBJECTS) $(LNNAME) + $(LN) $(LDFLAGS) -o djpeg-shared $(DOBJECTS) $(LNNAME) $(LDLIBS) + +jpegtran-shared: $(TROBJECTS) $(LNNAME) + $(LN) $(LDFLAGS) -o jpegtran-shared $(TROBJECTS) $(LNNAME) $(LDLIBS) + +rdjpgcom: rdjpgcom.o + $(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS) + +wrjpgcom: wrjpgcom.o + $(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS) + +cjpeg: cjpeg-shared + echo '#!/bin/sh' > cjpeg + echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> cjpeg + echo exec `pwd`/cjpeg-shared '"$$@"' >> cjpeg + chmod +x cjpeg + +djpeg: djpeg-shared + echo '#!/bin/sh' > djpeg + echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> djpeg + echo exec `pwd`/djpeg-shared '"$$@"' >> djpeg + chmod +x djpeg + +jpegtran: jpegtran-shared + echo '#!/bin/sh' > jpegtran + echo export LD_LIBRARY_PATH=`pwd`:'$$LD_LIBRARY_PATH' >> jpegtran + echo exec `pwd`/jpegtran-shared '"$$@"' >> jpegtran + chmod +x jpegtran + +jconfig.h: jconfig.doc + echo You must prepare a system-dependent jconfig.h file. + echo Please read the installation directions in install.doc. + exit 1 + +clean: + $(RM) *.o libjpeg.a $(LIBNAME) $(SONAME) $(LNNAME) + $(RM) cjpeg djpeg jpegtran rdjpgcom wrjpgcom + $(RM) cjpeg-shared djpeg-shared jpegtran-shared + $(RM) cjpeg-static djpeg-static jpegtran-static + $(RM) core testout* + $(RM) jsimdcfg.inc + +test: cjpeg djpeg jpegtran + $(RM) testout* + ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)/testorig.jpg + ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)/testorig.jpg + ./cjpeg -dct int -outfile testout.jpg $(srcdir)/testimg.ppm + ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg + ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm + ./jpegtran -outfile testoutt.jpg $(srcdir)/testprog.jpg + cmp $(srcdir)/testimg.ppm testout.ppm + cmp $(srcdir)/testimg.bmp testout.bmp + cmp $(srcdir)/testimg.jpg testout.jpg + cmp $(srcdir)/testimg.ppm testoutp.ppm + cmp $(srcdir)/testimgp.jpg testoutp.jpg + cmp $(srcdir)/testorig.jpg testoutt.jpg + +test-static: cjpeg-static djpeg-static jpegtran-static + $(RM) testout* + ./djpeg-static -dct int -ppm -outfile testout.ppm $(srcdir)/testorig.jpg + ./djpeg-static -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)/testorig.jpg + ./cjpeg-static -dct int -outfile testout.jpg $(srcdir)/testimg.ppm + ./djpeg-static -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg + ./cjpeg-static -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm + ./jpegtran-static -outfile testoutt.jpg $(srcdir)/testprog.jpg + cmp $(srcdir)/testimg.ppm testout.ppm + cmp $(srcdir)/testimg.bmp testout.bmp + cmp $(srcdir)/testimg.jpg testout.jpg + cmp $(srcdir)/testimg.ppm testoutp.ppm + cmp $(srcdir)/testimgp.jpg testoutp.jpg + cmp $(srcdir)/testorig.jpg testoutt.jpg + + +install: install-lib install-app install-man + +install-app-static: cjpeg-static djpeg-static jpegtran-static + -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi + $(INSTALL_PROGRAM) cjpeg-static $(bindir)/$(binprefix)cjpeg-static + $(INSTALL_PROGRAM) djpeg-static $(bindir)/$(binprefix)djpeg-static + $(INSTALL_PROGRAM) jpegtran-static $(bindir)/$(binprefix)jpegtran-static + +install-app: install-lib cjpeg-shared djpeg-shared jpegtran-shared rdjpgcom wrjpgcom + -@if [ ! -d $(bindir) ]; then mkdir -p $(bindir); fi + $(INSTALL_PROGRAM) cjpeg-shared $(bindir)/$(binprefix)cjpeg + $(INSTALL_PROGRAM) djpeg-shared $(bindir)/$(binprefix)djpeg + $(INSTALL_PROGRAM) jpegtran-shared $(bindir)/$(binprefix)jpegtran + $(INSTALL_PROGRAM) rdjpgcom $(bindir)/$(binprefix)rdjpgcom + $(INSTALL_PROGRAM) wrjpgcom $(bindir)/$(binprefix)wrjpgcom + +install-man: cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1 + -@if [ ! -d $(mandir) ]; then mkdir -p $(mandir); fi + $(INSTALL_DATA) $(srcdir)/cjpeg.1 $(mandir)/$(manprefix)cjpeg.$(manext) + $(INSTALL_DATA) $(srcdir)/djpeg.1 $(mandir)/$(manprefix)djpeg.$(manext) + $(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext) + $(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext) + $(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext) + +install-lib: install-headers libjpeg.a $(LIBNAME) + -@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi + $(INSTALL_LIB) libjpeg.a $(libdir)/libjpeg.a + $(INSTALL_SHARED) $(LIBNAME) $(libdir)/$(LIBNAME) + (cd $(libdir); ln -sf $(LIBNAME) $(SONAME); ln -sf $(LIBNAME) $(LNNAME)) + +install-headers: jconfig.h jpeglib.h jmorecfg.h jerror.h + -@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi + $(INSTALL_DATA) $(srcdir)/jconfig.h $(includedir)/jconfig.h + $(INSTALL_DATA) $(srcdir)/jpeglib.h $(includedir)/jpeglib.h + $(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h + $(INSTALL_DATA) $(srcdir)/jerror.h $(includedir)/jerror.h + +uninstall: uninstall-lib uninstall-app uninstall-man + +uninstall-app-static: + $(RM) $(bindir)/$(binprefix)cjpeg-static + $(RM) $(bindir)/$(binprefix)djpeg-static + $(RM) $(bindir)/$(binprefix)jpegtran-static + +uninstall-app: uninstall-lib + $(RM) $(bindir)/$(binprefix)cjpeg + $(RM) $(bindir)/$(binprefix)djpeg + $(RM) $(bindir)/$(binprefix)jpegtran + $(RM) $(bindir)/$(binprefix)rdjpgcom + $(RM) $(bindir)/$(binprefix)wrjpgcom + +uninstall-man: + $(RM) $(mandir)/$(manprefix)cjpeg.$(manext) + $(RM) $(mandir)/$(manprefix)djpeg.$(manext) + $(RM) $(mandir)/$(manprefix)jpegtran.$(manext) + $(RM) $(mandir)/$(manprefix)rdjpgcom.$(manext) + $(RM) $(mandir)/$(manprefix)wrjpgcom.$(manext) + +uninstall-lib: uninstall-headers + $(RM) $(libdir)/libjpeg.a + $(RM) $(libdir)/$(LIBNAME) + $(RM) $(libdir)/$(SONAME) + $(RM) $(libdir)/$(LNNAME) + +uninstall-headers: + $(RM) $(includedir)/jconfig.h + $(RM) $(includedir)/jpeglib.h + $(RM) $(includedir)/jmorecfg.h + $(RM) $(includedir)/jerror.h + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg $(srcdir)/makecfg.c $(LDLIBS) + ./makecfg > jsimdcfg.inc + $(RM) ./makecfg + +.SUFFIXES: .c .asm .o .pic.o + +%.pic.o : %.c + $(CC) $(CFLAGS) -fPIC -c -o $@ $< + +%.pic.o : %.asm + $(NASM) $(NAFLAGS) -DPIC -o $@ $< + +%.o : %.asm + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.o jsimdcpu.pic.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o jsimdw32.pic.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o jsimddjg.pic.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o jccolmmx.pic.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o jccolss2.pic.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o jcsammmx.pic.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o jcsamss2.pic.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o jdcolmmx.pic.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o jdcolss2.pic.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o jdmermmx.pic.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o jdmerss2.pic.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o jdsammmx.pic.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o jdsamss2.pic.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o jcqntint.pic.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o jcqntflt.pic.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o jcqntmmx.pic.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o jcqnt3dn.pic.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o jcqnts2i.pic.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o jcqntsse.pic.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o jcqnts2f.pic.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o jfdctint.pic.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o jfdctfst.pic.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o jfdctflt.pic.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o jfmmxint.pic.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o jfmmxfst.pic.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o jf3dnflt.pic.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o jfss2int.pic.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o jfss2fst.pic.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o jfsseflt.pic.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o jidctint.pic.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o jidctfst.pic.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o jidctred.pic.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o jidctflt.pic.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o jimmxint.pic.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o jimmxfst.pic.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o jimmxred.pic.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o ji3dnflt.pic.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o jiss2int.pic.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o jiss2fst.pic.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o jiss2red.pic.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o jisseflt.pic.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o jiss2flt.pic.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.o jsimdgcc.pic.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + +jcapimin.o jcapimin.pic.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcapistd.o jcapistd.pic.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccoefct.o jccoefct.pic.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o jccolor.pic.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jcdctmgr.o jcdctmgr.pic.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jchuff.o jchuff.pic.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcinit.o jcinit.pic.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmainct.o jcmainct.pic.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmarker.o jcmarker.pic.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmaster.o jcmaster.pic.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcomapi.o jcomapi.pic.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcparam.o jcparam.pic.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcphuff.o jcphuff.pic.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcprepct.o jcprepct.pic.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o jcsample.pic.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jctrans.o jctrans.pic.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapimin.o jdapimin.pic.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapistd.o jdapistd.pic.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdatadst.o jdatadst.pic.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdatasrc.o jdatasrc.pic.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdcoefct.o jdcoefct.pic.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o jdcolor.pic.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jddctmgr.o jddctmgr.pic.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jdhuff.o jdhuff.pic.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdinput.o jdinput.pic.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmainct.o jdmainct.pic.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmarker.o jdmarker.pic.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmaster.o jdmaster.pic.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o jdmerge.pic.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdphuff.o jdphuff.pic.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdpostct.o jdpostct.pic.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o jdsample.pic.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdtrans.o jdtrans.pic.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jerror.o jerror.pic.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h +# jfdctflt.o jfdctflt.pic.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o jfdctfst.pic.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o jfdctint.pic.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o jidctflt.pic.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o jidctfst.pic.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o jidctint.pic.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o jidctred.pic.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jquant1.o jquant1.pic.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jquant2.o jquant2.pic.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jutils.o jutils.pic.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jmemmgr.o jmemmgr.pic.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemansi.o jmemansi.pic.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemname.o jmemname.pic.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemnobs.o jmemnobs.pic.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemdos.o jmemdos.pic.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemmac.o jmemmac.pic.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h +rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h +wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h +cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h +rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h diff --git a/makefile.mgw b/makefile.mgw new file mode 100644 index 0000000..06f09e0 --- /dev/null +++ b/makefile.mgw @@ -0,0 +1,298 @@ +# Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension + +# This makefile is for MinGW. + +# Read installation instructions before saying "make" !! + +srcdir = . +VPATH = $(srcdir) + +# The name of your C compiler: +CC= gcc + +# You may need to adjust these cc options: +# For gcc 3.4.x +CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \ + -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) +# For gcc 3.3.x +#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \ +# -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) +# Generally, we recommend defining any configuration symbols in jconfig.h, +# NOT via -D switches here. + +# The executable name of NASM and its options: +NASM= nasmw +NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fwin32 -DWIN32 + +# Link-time cc options: +LDFLAGS= -s + +# To link any special libraries, add the necessary -l commands here. +LDLIBS= + +# Put here the object file name for the correct system-dependent memory +# manager file. +SYSDEPMEM= jmemnobs.o + +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimdw32.o + +# miscellaneous OS-dependent stuff +# linker +LN= $(CC) +# file deletion command +RM= del +# library (.a) file creation command +AR= ar rc +# second step in .a creation (use "touch" if not needed) +AR2= ranlib + +# End of configurable options. + + +# source files: JPEG library proper +LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \ + jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \ + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \ + jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \ + jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \ + jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \ + jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \ + jquant2.c jutils.c jmemmgr.c +# memmgr back ends: compile only one of these into a working library +SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c +# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom +APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \ + rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \ + rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c +SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES) +# files included by source files +INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h +# documentation, test, and support files +DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \ + wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \ + coderules.doc filelist.doc change.log +MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \ + makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \ + makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \ + maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \ + makvms.opt +CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \ + jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \ + jconfig.vms +CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh +OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm +TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ + testimgp.jpg +DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ + $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) +# library object files common to compression and decompression +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) +# compression library object files +CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ + jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o +# decompression library object files +DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ + jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o +# These objectfiles are included in libjpeg.a +LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) +# object files for sample applications (excluding library files) +COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \ + cdjpeg.o +DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \ + cdjpeg.o +TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o + + +all: libjpeg.a cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe + +libjpeg.a: $(LIBOBJECTS) + -$(RM) libjpeg.a + $(AR) libjpeg.a $(LIBOBJECTS) + $(AR2) libjpeg.a + +cjpeg.exe: $(COBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o cjpeg.exe $(COBJECTS) libjpeg.a $(LDLIBS) + +djpeg.exe: $(DOBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o djpeg.exe $(DOBJECTS) libjpeg.a $(LDLIBS) + +jpegtran.exe: $(TROBJECTS) libjpeg.a + $(LN) $(LDFLAGS) -o jpegtran.exe $(TROBJECTS) libjpeg.a $(LDLIBS) + +rdjpgcom.exe: rdjpgcom.o + $(LN) $(LDFLAGS) -o rdjpgcom.exe rdjpgcom.o $(LDLIBS) + +wrjpgcom.exe: wrjpgcom.o + $(LN) $(LDFLAGS) -o wrjpgcom.exe wrjpgcom.o $(LDLIBS) + +jconfig.h: jconfig.doc + echo You must prepare a system-dependent jconfig.h file. + echo Please read the installation directions in install.doc. + exit 1 + +clean: + -$(RM) *.o + -$(RM) cjpeg.exe + -$(RM) djpeg.exe + -$(RM) jpegtran.exe + -$(RM) rdjpgcom.exe + -$(RM) wrjpgcom.exe + -$(RM) jsimdcfg.inc + -$(RM) libjpeg.a + -$(RM) testout*.* + +test: cjpeg.exe djpeg.exe jpegtran.exe + -$(RM) testout*.* + ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg + ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg + ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm + ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg + ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm + ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg + fc /b $(srcdir)\testimg.ppm testout.ppm + fc /b $(srcdir)\testimg.bmp testout.bmp + fc /b $(srcdir)\testimg.jpg testout.jpg + fc /b $(srcdir)\testimg.ppm testoutp.ppm + fc /b $(srcdir)\testimgp.jpg testoutp.jpg + fc /b $(srcdir)\testorig.jpg testoutt.jpg + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + $(RM) makecfg.exe + +%.o : %.asm + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + +jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h +# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h +rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h +wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h +cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h +rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h diff --git a/makefile.mgwdll b/makefile.mgwdll new file mode 100644 index 0000000..08a3e69 --- /dev/null +++ b/makefile.mgwdll @@ -0,0 +1,310 @@ +# Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension + +# This makefile is for MinGW. +# It builds the IJG library as a dynamically linkable library (.DLL), +# and builds the sample applications which are linked against the DLL. + +# Read installation instructions before saying "make" !! + +srcdir = . +VPATH = $(srcdir) + +# The name of your C compiler: +CC= gcc + +# You may need to adjust these cc options: +# For gcc 3.4.x +CFLAGS= -O2 -mtune=pentium2 -march=i386 -fomit-frame-pointer -fweb \ + -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) +# For gcc 3.3.x +#CFLAGS= -O2 -mcpu=pentium2 -march=i386 -fomit-frame-pointer \ +# -mpreferred-stack-boundary=2 -mno-align-stringops -I$(srcdir) +# Generally, we recommend defining any configuration symbols in jconfig.h, +# NOT via -D switches here. + +# The executable name of NASM and its options: +NASM= nasmw +NAFLAGS= $(NASM_OBJFMT) -I$(srcdir)/ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fwin32 -DWIN32 + +# Link-time cc options: +LDFLAGS= -s +LDFLAGS_DLL= $(LDFLAGS) -shared + +# To link any special libraries, add the necessary -l commands here. +LDLIBS= + +# DLL to build +DLLNAME = jpeg62.dll +# import library +LIBNAME = libjpeg.dll.a + +# Put here the object file name for the correct system-dependent memory +# manager file. +SYSDEPMEM= jmemnobs.o + +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimdw32.o + +# miscellaneous OS-dependent stuff +# linker +LN= $(CC) +# file deletion command +RM= del +# library (.a) file creation command +AR= ar rc +# second step in .a creation (use "touch" if not needed) +AR2= ranlib + +# End of configurable options. + + +# source files: JPEG library proper +LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \ + jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \ + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \ + jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \ + jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \ + jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \ + jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \ + jquant2.c jutils.c jmemmgr.c +# memmgr back ends: compile only one of these into a working library +SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c +# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom +APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \ + rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \ + rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c +SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES) +# files included by source files +INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h +# documentation, test, and support files +DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \ + wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \ + coderules.doc filelist.doc change.log +MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \ + makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \ + makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \ + maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \ + makvms.opt +CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \ + jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \ + jconfig.vms +CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh +OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm +TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ + testimgp.jpg +DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ + $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) +# library object files common to compression and decompression +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) +# compression library object files +CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ + jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o +# decompression library object files +DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ + jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o +# These objectfiles are included in libjpeg.a +LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) +# object files for sample applications (excluding library files) +COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \ + cdjpeg.o +DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \ + cdjpeg.o +TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o + + +all: $(DLLNAME) cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe + +$(LIBNAME): $(DLLNAME) +$(DLLNAME): $(LIBOBJECTS) jpegdll.o jpegdll.def + $(LN) $(LDFLAGS_DLL) -o $(DLLNAME) -Wl,--out-implib,$(LIBNAME) \ + $(LIBOBJECTS) jpegdll.o jpegdll.def + +jpegdll.o: jpegdll.rc + windres -O coff -o $@ $*.rc + +cjpeg.exe: $(COBJECTS) $(LIBNAME) + $(LN) $(LDFLAGS) -o cjpeg.exe $(COBJECTS) $(LIBNAME) $(LDLIBS) + +djpeg.exe: $(DOBJECTS) $(LIBNAME) + $(LN) $(LDFLAGS) -o djpeg.exe $(DOBJECTS) $(LIBNAME) $(LDLIBS) + +jpegtran.exe: $(TROBJECTS) $(LIBNAME) + $(LN) $(LDFLAGS) -o jpegtran.exe $(TROBJECTS) $(LIBNAME) $(LDLIBS) + +rdjpgcom.exe: rdjpgcom.o + $(LN) $(LDFLAGS) -o rdjpgcom.exe rdjpgcom.o $(LDLIBS) + +wrjpgcom.exe: wrjpgcom.o + $(LN) $(LDFLAGS) -o wrjpgcom.exe wrjpgcom.o $(LDLIBS) + +jconfig.h: jconfig.doc + echo You must prepare a system-dependent jconfig.h file. + echo Please read the installation directions in install.doc. + exit 1 + +clean: + -$(RM) *.o + -$(RM) cjpeg.exe + -$(RM) djpeg.exe + -$(RM) jpegtran.exe + -$(RM) rdjpgcom.exe + -$(RM) wrjpgcom.exe + -$(RM) jsimdcfg.inc + -$(RM) $(DLLNAME) + -$(RM) $(LIBNAME) + -$(RM) testout*.* + +test: cjpeg.exe djpeg.exe jpegtran.exe + -$(RM) testout*.* + ./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)\testorig.jpg + ./djpeg -dct int -bmp -colors 256 -outfile testout.bmp $(srcdir)\testorig.jpg + ./cjpeg -dct int -outfile testout.jpg $(srcdir)\testimg.ppm + ./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)\testprog.jpg + ./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)\testimg.ppm + ./jpegtran -outfile testoutt.jpg $(srcdir)\testprog.jpg + fc /b $(srcdir)\testimg.ppm testout.ppm + fc /b $(srcdir)\testimg.bmp testout.bmp + fc /b $(srcdir)\testimg.jpg testout.jpg + fc /b $(srcdir)\testimg.ppm testoutp.ppm + fc /b $(srcdir)\testimgp.jpg testoutp.jpg + fc /b $(srcdir)\testorig.jpg testoutt.jpg + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg.exe $(srcdir)/makecfg.c $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + $(RM) makecfg.exe + +%.o : %.asm + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + +jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h +# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h +rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h +wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h +cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h +rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h diff --git a/makefile.unix b/makefile.unix index 00455ab..e05ecc0 100644 --- a/makefile.unix +++ b/makefile.unix @@ -1,4 +1,5 @@ # Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension # This makefile is suitable for Unix-like systems with non-ANSI compilers. # If you have an ANSI compiler, makefile.ansi is a better starting point. @@ -15,6 +16,13 @@ CFLAGS= -O # However, any special defines for ansi2knr.c may be included here: ANSI2KNRFLAGS= +# The executable name of NASM and its options: +NASM= nasm +NAFLAGS= $(NASM_OBJFMT) -I./ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -faout -DAOUT + # Link-time cc options: LDFLAGS= @@ -26,6 +34,10 @@ LDLIBS= # to use jmemansi.o or jmemname.o if you have limited swap space. SYSDEPMEM= jmemnobs.o +# OS-dependent SIMD instruction support checker +# jsimdw32.o (Win32) / jsimddjg.o (DJGPP V.2) / jsimdgcc.o (Unix/gcc) +SYSDEPSIMDCHK= jsimdgcc.o + # miscellaneous OS-dependent stuff # linker LN= $(CC) @@ -79,17 +91,23 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) # library object files common to compression and decompression -COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) +COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM) jsimdcpu.o \ + $(SYSDEPSIMDCHK) # compression library object files CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \ jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \ - jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \ - jfdctint.o + jcsample.o jchuff.o jcphuff.o jcdctmgr.o jccolmmx.o jccolss2.o \ + jcsammmx.o jcsamss2.o jcqntint.o jcqntflt.o jcqntmmx.o jcqnt3dn.o \ + jcqnts2i.o jcqntsse.o jcqnts2f.o jfdctint.o jfdctfst.o jfdctflt.o \ + jfmmxint.o jfmmxfst.o jf3dnflt.o jfss2int.o jfss2fst.o jfsseflt.o # decompression library object files DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \ jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \ - jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \ - jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o + jdpostct.o jddctmgr.o jdsample.o jdcolor.o jquant1.o jquant2.o \ + jdmerge.o jidctint.o jidctfst.o jidctred.o jidctflt.o jimmxint.o \ + jimmxfst.o jimmxred.o ji3dnflt.o jiss2int.o jiss2fst.o jiss2red.o \ + jisseflt.o jiss2flt.o jdsammmx.o jdsamss2.o jdcolmmx.o jdcolss2.o \ + jdmermmx.o jdmerss2.o # These objectfiles are included in libjpeg.a LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) # object files for sample applications (excluding library files) @@ -139,7 +157,7 @@ jconfig.h: jconfig.doc clean: $(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom - $(RM) ansi2knr core testout* + $(RM) jsimdcfg.inc ansi2knr core testout* test: cjpeg djpeg jpegtran $(RM) testout* @@ -157,10 +175,63 @@ test: cjpeg djpeg jpegtran cmp testorig.jpg testoutt.jpg +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) $(LDFLAGS) -o makecfg ./makecfg.c $(LDLIBS) + ./makecfg > jsimdcfg.inc + $(RM) ./makecfg + +.asm.o: + $(NASM) $(NAFLAGS) -o $@ $*.asm + +jsimdcpu.o: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.o: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.o: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.o: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.o: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.o: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.o: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.o: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.o: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.o: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.o: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.o: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.o: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.o: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.o: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.o: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.o: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.o: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.o: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.o: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.o: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.o: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.o: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.o: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.o: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.o: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.o: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.o: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.o: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.o: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.o: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.o: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.o: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.o: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.o: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.o: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.o: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.o: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.o: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.o: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.o: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.o: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.o: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h @@ -171,33 +242,33 @@ jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror. jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h -jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h diff --git a/makefile.vc b/makefile.vc index 2acf069..7cfeda3 100644 --- a/makefile.vc +++ b/makefile.vc @@ -1,32 +1,50 @@ # Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension # This makefile is for Microsoft Visual C++ on Windows NT (and 95?). # It builds the IJG library as a statically linkable library (.LIB), # and builds the sample applications as console-mode apps. -# Thanks to Xingong Chang, Raymond Everly and others. # Read installation instructions before saying "nmake" !! -# To build an optimized library without debug info, say "nmake nodebug=1". -# Pull in standard variable definitions -!include +# The name of your C compiler: +CC= cl +LD= link # You may want to adjust these compiler options: -CFLAGS= $(cflags) $(cdebug) $(cvars) -I. +!ifdef crtdll +# (DLL version of CRT) +CFLAGS= -nologo -c -MD -W3 -O2 -GF -Gy -DNDEBUG -I. +!else +# (Single threaded static CRT) +CFLAGS= -nologo -c -ML -W3 -O2 -GF -Gy -DNDEBUG -I. +!endif + # Generally, we recommend defining any configuration symbols in jconfig.h, # NOT via -D switches here. +# The executable name of NASM and its options: +NASM= nasmw +NAFLAGS= $(NASM_OBJFMT) -I./ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fwin32 -DWIN32 + # Link-time options: -LDFLAGS= $(ldebug) $(conlflags) +LDFLAGS= -nologo -release -subsystem:console,4.0 -opt:nowin98 # To link any special libraries, add the necessary commands here. -LDLIBS= $(conlibs) +LDLIBS= # Put here the object file name for the correct system-dependent memory # manager file. For NT we suggest jmemnobs.obj, which expects the OS to # provide adequate virtual memory. SYSDEPMEM= jmemnobs.obj +# OS-dependent SIMD instruction support checker +# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc) +SYSDEPSIMDCHK= jsimdw32.obj + # miscellaneous OS-dependent stuff # file deletion command RM= del @@ -72,18 +90,26 @@ TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) # library object files common to compression and decompression -COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) +COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \ + jsimdcpu.obj $(SYSDEPSIMDCHK) # compression library object files CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \ jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \ jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \ - jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj + jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \ + jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \ + jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \ + jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \ + jfsseflt.obj # decompression library object files DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \ jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \ - jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \ - jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \ - jquant1.obj jquant2.obj jdmerge.obj + jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \ + jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \ + jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \ + jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \ + jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \ + jdcolss2.obj jdmermmx.obj jdmerss2.obj # These objectfiles are included in libjpeg.lib LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) # object files for sample applications (excluding library files) @@ -94,38 +120,46 @@ DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \ TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj # Template command for compiling .c to .obj -.c.obj: - $(cc) $(CFLAGS) $*.c +.c.obj:: + $(CC) $(CFLAGS) $< all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe libjpeg.lib: $(LIBOBJECTS) - $(RM) libjpeg.lib + -$(RM) libjpeg.lib lib -out:libjpeg.lib $(LIBOBJECTS) cjpeg.exe: $(COBJECTS) libjpeg.lib - $(link) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS) + $(LD) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS) djpeg.exe: $(DOBJECTS) libjpeg.lib - $(link) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS) + $(LD) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS) jpegtran.exe: $(TROBJECTS) libjpeg.lib - $(link) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS) + $(LD) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS) rdjpgcom.exe: rdjpgcom.obj - $(link) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS) + $(LD) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS) wrjpgcom.exe: wrjpgcom.obj - $(link) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS) + $(LD) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS) clean: - $(RM) *.obj *.exe libjpeg.lib - $(RM) testout* + -$(RM) *.obj + -$(RM) cjpeg.exe + -$(RM) djpeg.exe + -$(RM) jpegtran.exe + -$(RM) rdjpgcom.exe + -$(RM) wrjpgcom.exe + -$(RM) jsimdcfg.inc + -$(RM) libjpeg.lib + -if exist *.manifest $(RM) *.manifest + -if exist testout* $(RM) testout* test: cjpeg.exe djpeg.exe jpegtran.exe - $(RM) testout* + -if exist testout* $(RM) testout* .\djpeg -dct int -ppm -outfile testout.ppm testorig.jpg .\djpeg -dct int -bmp -colors 256 -outfile testout.bmp testorig.jpg .\cjpeg -dct int -outfile testout.jpg testimg.ppm @@ -140,10 +174,66 @@ test: cjpeg.exe djpeg.exe jpegtran.exe fc /b testorig.jpg testoutt.jpg +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) makecfg.c + $(LD) $(LDFLAGS) -out:makecfg.exe makecfg.obj $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + $(RM) makecfg.obj + $(RM) makecfg.exe + if exist makecfg.exe.manifest $(RM) makecfg.exe.manifest + +.asm.obj: + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h @@ -154,33 +244,33 @@ jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerro jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h -jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h -jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h -jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h diff --git a/makefile.vcdll b/makefile.vcdll new file mode 100644 index 0000000..cd715eb --- /dev/null +++ b/makefile.vcdll @@ -0,0 +1,311 @@ +# Makefile for Independent JPEG Group's software +# Modified for x86 SIMD extension + +# This makefile is for Microsoft Visual C++ 6.0. +# It builds the IJG library as a dynamically linkable library (.DLL), +# and builds the sample applications which are linked against the DLL. + +# Read installation instructions before saying "nmake" !! + +# The name of your C compiler: +CC= cl +LD= link +RC= rc + +# You may want to adjust these compiler options: +# You have to use a DLL version of C Run-Time library for both +# the JPEG DLL and any applications linked to the JPEG DLL. +CFLAGS= -nologo -c -MD -W3 -O2 -GF -Gy -DNDEBUG -I. + +# Generally, we recommend defining any configuration symbols in jconfig.h, +# NOT via -D switches here. + +# The executable name of NASM and its options: +NASM= nasmw +NAFLAGS= $(NASM_OBJFMT) -I./ +# object file format specifier for NASM +# see jsimdext.inc for more details. +NASM_OBJFMT= -fwin32 -DWIN32 + +# Link-time options: +LDFLAGS= -nologo -release -subsystem:console,4.0 -opt:nowin98 +LDFLAGS_DLL= -nologo -release -dll -opt:nowin98 + +# To link any special libraries, add the necessary commands here. +LDLIBS= + +# DLL to build +DLLNAME = jpeg62.dll +# import library +LIBNAME = jpeg62.lib + +# Put here the object file name for the correct system-dependent memory +# manager file. For NT we suggest jmemnobs.obj, which expects the OS to +# provide adequate virtual memory. +SYSDEPMEM= jmemnobs.obj + +# OS-dependent SIMD instruction support checker +# jsimdw32.obj (Win32) / jsimddjg.obj (DJGPP V.2) / jsimdgcc.obj (Unix/gcc) +SYSDEPSIMDCHK= jsimdw32.obj + +# miscellaneous OS-dependent stuff +# file deletion command +RM= del + +# End of configurable options. + + +# source files: JPEG library proper +LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \ + jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \ + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \ + jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \ + jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \ + jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \ + jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \ + jquant2.c jutils.c jmemmgr.c +# memmgr back ends: compile only one of these into a working library +SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c +# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom +APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \ + rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \ + rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c +SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES) +# files included by source files +INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ + jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h +# documentation, test, and support files +DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \ + wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \ + coderules.doc filelist.doc change.log +MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \ + makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \ + makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \ + maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \ + makvms.opt +CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \ + jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \ + jconfig.vms +CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh +OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm +TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \ + testimgp.jpg +DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \ + $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES) +# library object files common to compression and decompression +COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM) \ + jsimdcpu.obj $(SYSDEPSIMDCHK) +# compression library object files +CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \ + jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \ + jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \ + jcdctmgr.obj jccolmmx.obj jccolss2.obj jcsammmx.obj jcsamss2.obj \ + jcqntint.obj jcqntflt.obj jcqntmmx.obj jcqnt3dn.obj jcqnts2i.obj \ + jcqntsse.obj jcqnts2f.obj jfdctint.obj jfdctfst.obj jfdctflt.obj \ + jfmmxint.obj jfmmxfst.obj jf3dnflt.obj jfss2int.obj jfss2fst.obj \ + jfsseflt.obj +# decompression library object files +DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \ + jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \ + jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jdsample.obj \ + jdcolor.obj jquant1.obj jquant2.obj jdmerge.obj jidctint.obj \ + jidctfst.obj jidctred.obj jidctflt.obj jimmxint.obj jimmxfst.obj \ + jimmxred.obj ji3dnflt.obj jiss2int.obj jiss2fst.obj jiss2red.obj \ + jisseflt.obj jiss2flt.obj jdsammmx.obj jdsamss2.obj jdcolmmx.obj \ + jdcolss2.obj jdmermmx.obj jdmerss2.obj +# These objectfiles are included in libjpeg.lib +LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS) +# object files for sample applications (excluding library files) +COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \ + rdswitch.obj cdjpeg.obj +DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \ + rdcolmap.obj cdjpeg.obj +TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj + +# Template command for compiling .c to .obj +.c.obj:: + $(CC) $(CFLAGS) $< + + +all: $(DLLNAME) cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe + +$(LIBNAME): $(DLLNAME) +$(DLLNAME): $(LIBOBJECTS) jpegdll.res jpegdll.def + $(LD) $(LDFLAGS_DLL) -out:$(DLLNAME) -implib:$(LIBNAME) \ + $(LIBOBJECTS) jpegdll.res -def:jpegdll.def + +jpegdll.res: jpegdll.rc + $(RC) -fo $@ $*.rc + +cjpeg.exe: $(COBJECTS) $(LIBNAME) + $(LD) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) $(LIBNAME) $(LDLIBS) + +djpeg.exe: $(DOBJECTS) $(LIBNAME) + $(LD) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) $(LIBNAME) $(LDLIBS) + +jpegtran.exe: $(TROBJECTS) $(LIBNAME) + $(LD) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) $(LIBNAME) $(LDLIBS) + +rdjpgcom.exe: rdjpgcom.obj + $(LD) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS) + +wrjpgcom.exe: wrjpgcom.obj + $(LD) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS) + + +clean: + -$(RM) *.obj + -$(RM) cjpeg.exe + -$(RM) djpeg.exe + -$(RM) jpegtran.exe + -$(RM) rdjpgcom.exe + -$(RM) wrjpgcom.exe + -$(RM) jsimdcfg.inc + -$(RM) jpegdll.res + -$(RM) $(DLLNAME) + -$(RM) $(DLLNAME:.dll=.exp) + -$(RM) $(LIBNAME) + -if exist *.manifest $(RM) *.manifest + -if exist testout* $(RM) testout* + +test: cjpeg.exe djpeg.exe jpegtran.exe + -if exist testout* $(RM) testout* + .\djpeg -dct int -ppm -outfile testout.ppm testorig.jpg + .\djpeg -dct int -bmp -colors 256 -outfile testout.bmp testorig.jpg + .\cjpeg -dct int -outfile testout.jpg testimg.ppm + .\djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg + .\cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm + .\jpegtran -outfile testoutt.jpg testprog.jpg + fc /b testimg.ppm testout.ppm + fc /b testimg.bmp testout.bmp + fc /b testimg.jpg testout.jpg + fc /b testimg.ppm testoutp.ppm + fc /b testimgp.jpg testoutp.jpg + fc /b testorig.jpg testoutt.jpg + + +jsimdcfg.inc: makecfg.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h + $(CC) $(CFLAGS) makecfg.c + $(LD) $(LDFLAGS) -out:makecfg.exe makecfg.obj $(LDLIBS) + .\makecfg.exe > jsimdcfg.inc + $(RM) makecfg.obj + $(RM) makecfg.exe + if exist makecfg.exe.manifest $(RM) makecfg.exe.manifest + +.asm.obj: + $(NASM) $(NAFLAGS) -o $@ $< + +jsimdcpu.obj: jsimdcpu.asm jsimdcfg.inc jsimdext.inc +jsimdw32.obj: jsimdw32.asm jsimdcfg.inc jsimdext.inc +jsimddjg.obj: jsimddjg.asm jsimdcfg.inc jsimdext.inc +jccolmmx.obj: jccolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jccolss2.obj: jccolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsammmx.obj: jcsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcsamss2.obj: jcsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolmmx.obj: jdcolmmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdcolss2.obj: jdcolss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmermmx.obj: jdmermmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdmerss2.obj: jdmerss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsammmx.obj: jdsammmx.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jdsamss2.obj: jdsamss2.asm jsimdcfg.inc jsimdext.inc jcolsamp.inc +jcqntint.obj: jcqntint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntflt.obj: jcqntflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntmmx.obj: jcqntmmx.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnt3dn.obj: jcqnt3dn.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2i.obj: jcqnts2i.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqntsse.obj: jcqntsse.asm jsimdcfg.inc jsimdext.inc jdct.inc +jcqnts2f.obj: jcqnts2f.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctint.obj: jfdctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctfst.obj: jfdctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfdctflt.obj: jfdctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxint.obj: jfmmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfmmxfst.obj: jfmmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jf3dnflt.obj: jf3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2int.obj: jfss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfss2fst.obj: jfss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jfsseflt.obj: jfsseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctint.obj: jidctint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctfst.obj: jidctfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctred.obj: jidctred.asm jsimdcfg.inc jsimdext.inc jdct.inc +jidctflt.obj: jidctflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxint.obj: jimmxint.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxfst.obj: jimmxfst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jimmxred.obj: jimmxred.asm jsimdcfg.inc jsimdext.inc jdct.inc +ji3dnflt.obj: ji3dnflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2int.obj: jiss2int.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2fst.obj: jiss2fst.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2red.obj: jiss2red.asm jsimdcfg.inc jsimdext.inc jdct.inc +jisseflt.obj: jisseflt.asm jsimdcfg.inc jsimdext.inc jdct.inc +jiss2flt.obj: jiss2flt.asm jsimdcfg.inc jsimdext.inc jdct.inc + +jsimdgcc.obj: jsimdgcc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h + +jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h +jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h +jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h +jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jcolsamp.h +jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h +# jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +# jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h +jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h +jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h +cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h +jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h +rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h +wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h +cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h +rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h +wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h diff --git a/nasm_lt.sh b/nasm_lt.sh new file mode 100644 index 0000000..ef5a591 --- /dev/null +++ b/nasm_lt.sh @@ -0,0 +1,57 @@ +#! /bin/sh +command="" +infile="" +o_opt=no +pic=no +while [ $# -gt 0 ]; do + case "$1" in + -DPIC|-fPIC|-fpic) + if [ "$pic" != "yes" ] ; then + command="$command -DPIC" + pic=yes + fi + ;; + -f|-fbin|-faout|-faoutb|-fcoff|-felf|-fas86| \ + -fobj|-fwin32|-frdf|-fieee|-fmacho) + # it's a file format specifier for nasm. + command="$command $1" + ;; + -f*) + # maybe a code-generation flag for gcc. + ;; + -[Ii]*) + incdir=`echo "$1" | sed 's/^-[Ii]//'` + if [ "x$incdir" = x -a "x$2" != x ] ; then + case "$2" in + -*) ;; + *) incdir="$2"; shift;; + esac + fi + if [ "x$incdir" != x ] ; then + # In the case of NASM, the trailing slash is necessary. + incdir=`echo "$incdir" | sed 's%/*$%/%'` + command="$command -I$incdir" + fi + ;; + -o*) + o_opt=yes + command="$command $1" + ;; + *.asm) + infile=$1 + command="$command $1" + ;; + *) + command="$command $1" + ;; + esac + shift +done +if [ "$o_opt" != yes ] ; then + # By default, NASM creates an output file + # in the same directory as the input file. + outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o" + command="$command $outfile" +fi +echo $command +exec $command diff --git a/rdbmp.c b/rdbmp.c index b05fe2a..2245847 100644 --- a/rdbmp.c +++ b/rdbmp.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : October 19, 2004 + * --------------------------------------------------------------------- + * * This file contains routines to read input images in Microsoft "BMP" * format (MS Windows 3.x, OS/2 1.x, and OS/2 2.x flavors). * Currently, only 8-bit and 24-bit images are supported, not 1-bit or @@ -187,11 +194,14 @@ METHODDEF(JDIMENSION) preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { bmp_source_ptr source = (bmp_source_ptr) sinfo; +#if (BITS_IN_JSAMPLE != 8) || defined(NEED_FAR_POINTERS) register FILE *infile = source->pub.input_file; register int c; register JSAMPROW out_ptr; + JDIMENSION col; +#endif + JDIMENSION row; JSAMPARRAY image_ptr; - JDIMENSION row, col; cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; /* Read the data into a virtual array in input-file row order. */ @@ -204,6 +214,10 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) image_ptr = (*cinfo->mem->access_virt_sarray) ((j_common_ptr) cinfo, source->whole_image, row, (JDIMENSION) 1, TRUE); +#if (BITS_IN_JSAMPLE == 8) && !defined(NEED_FAR_POINTERS) + if (! ReadOK(source->pub.input_file, image_ptr[0], source->row_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); +#else out_ptr = image_ptr[0]; for (col = source->row_width; col > 0; col--) { /* inline copy of read_byte() for speed */ @@ -211,6 +225,7 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) ERREXIT(cinfo, JERR_INPUT_EOF); *out_ptr++ = (JSAMPLE) c; } +#endif } if (progress != NULL) progress->completed_extra_passes++; diff --git a/rdgif.c b/rdgif.c index b27c167..0da2515 100644 --- a/rdgif.c +++ b/rdgif.c @@ -1,19 +1,39 @@ /* * rdgif.c * - * Copyright (C) 1991-1997, Thomas G. Lane. + * Copyright (C) 1991-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + ************************************************************************** + * WARNING: You will need an LZW patent license from Unisys in order to * + * use this file legally in any commercial or shareware application. * + ************************************************************************** + * * This file contains routines to read input images in GIF format. * - ***************************************************************************** - * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * - * the ability to read GIF files has been removed from the IJG distribution. * - * Sorry about that. * - ***************************************************************************** + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume input from + * an ordinary stdio stream. They further assume that reading begins + * at the start of the file; input_init may need work if the + * user interface has already read some data (e.g., to determine that + * the file is indeed GIF format). + */ + +/* + * This code is loosely based on giftoppm from the PBMPLUS distribution + * of Feb. 1991. That file contains the following copyright notice: + * +-------------------------------------------------------------------+ + * | Copyright 1990, David Koblas. | + * | Permission to use, copy, modify, and distribute this software | + * | and its documentation for any purpose and without fee is hereby | + * | granted, provided that the above copyright notice appear in all | + * | copies and that both that copyright notice and this permission | + * | notice appear in supporting documentation. This software is | + * | provided "as is" without express or implied warranty. | + * +-------------------------------------------------------------------+ * - * We are required to state that + * We are also required to state that * "The Graphics Interchange Format(c) is the Copyright property of * CompuServe Incorporated. GIF(sm) is a Service Mark property of * CompuServe Incorporated." @@ -23,6 +43,622 @@ #ifdef GIF_SUPPORTED + +#define MAXCOLORMAPSIZE 256 /* max # of colors in a GIF colormap */ +#define NUMCOLORS 3 /* # of colors */ +#define CM_RED 0 /* color component numbers */ +#define CM_GREEN 1 +#define CM_BLUE 2 + +#define MAX_LZW_BITS 12 /* maximum LZW code size */ +#define LZW_TABLE_SIZE (1< table of prefix symbols */ + UINT8 FAR *symbol_tail; /* => table of suffix bytes */ + UINT8 FAR *symbol_stack; /* => stack for symbol expansions */ + UINT8 FAR *sp; /* stack pointer */ + + /* State for interlaced image processing */ + boolean is_interlaced; /* TRUE if have interlaced image */ + jvirt_sarray_ptr interlaced_image; /* full image in interlaced order */ + JDIMENSION cur_row_number; /* need to know actual row number */ + JDIMENSION pass2_offset; /* # of pixel rows in pass 1 */ + JDIMENSION pass3_offset; /* # of pixel rows in passes 1&2 */ + JDIMENSION pass4_offset; /* # of pixel rows in passes 1,2,3 */ +} gif_source_struct; + +typedef gif_source_struct * gif_source_ptr; + + +/* Forward declarations */ +METHODDEF(JDIMENSION) get_pixel_rows + JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo)); +METHODDEF(JDIMENSION) load_interlaced_image + JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo)); +METHODDEF(JDIMENSION) get_interlaced_row + JPP((j_compress_ptr cinfo, cjpeg_source_ptr sinfo)); + + +LOCAL(int) +ReadByte (gif_source_ptr sinfo) +/* Read next byte from GIF file */ +{ + register FILE * infile = sinfo->pub.input_file; + int c; + + if ((c = getc(infile)) == EOF) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + return c; +} + + +LOCAL(int) +GetDataBlock (gif_source_ptr sinfo, char *buf) +/* Read a GIF data block, which has a leading count byte */ +/* A zero-length block marks the end of a data block sequence */ +{ + int count; + + count = ReadByte(sinfo); + if (count > 0) { + if (! ReadOK(sinfo->pub.input_file, buf, count)) + ERREXIT(sinfo->cinfo, JERR_INPUT_EOF); + } + return count; +} + + +LOCAL(void) +SkipDataBlocks (gif_source_ptr sinfo) +/* Skip a series of data blocks, until a block terminator is found */ +{ + char buf[256]; + + while (GetDataBlock(sinfo, buf) > 0) + /* skip */; +} + + +LOCAL(void) +ReInitLZW (gif_source_ptr sinfo) +/* (Re)initialize LZW state; shared code for startup and Clear processing */ +{ + sinfo->code_size = sinfo->input_code_size + 1; + sinfo->limit_code = sinfo->clear_code << 1; /* 2^code_size */ + sinfo->max_code = sinfo->clear_code + 2; /* first unused code value */ + sinfo->sp = sinfo->symbol_stack; /* init stack to empty */ +} + + +LOCAL(void) +InitLZWCode (gif_source_ptr sinfo) +/* Initialize for a series of LZWReadByte (and hence GetCode) calls */ +{ + /* GetCode initialization */ + sinfo->last_byte = 2; /* make safe to "recopy last two bytes" */ + sinfo->last_bit = 0; /* nothing in the buffer */ + sinfo->cur_bit = 0; /* force buffer load on first call */ + sinfo->out_of_blocks = FALSE; + + /* LZWReadByte initialization: */ + /* compute special code values (note that these do not change later) */ + sinfo->clear_code = 1 << sinfo->input_code_size; + sinfo->end_code = sinfo->clear_code + 1; + sinfo->first_time = TRUE; + ReInitLZW(sinfo); +} + + +LOCAL(int) +GetCode (gif_source_ptr sinfo) +/* Fetch the next code_size bits from the GIF data */ +/* We assume code_size is less than 16 */ +{ + register INT32 accum; + int offs, ret, count; + + while ( (sinfo->cur_bit + sinfo->code_size) > sinfo->last_bit) { + /* Time to reload the buffer */ + if (sinfo->out_of_blocks) { + WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA); + return sinfo->end_code; /* fake something useful */ + } + /* preserve last two bytes of what we have -- assume code_size <= 16 */ + sinfo->code_buf[0] = sinfo->code_buf[sinfo->last_byte-2]; + sinfo->code_buf[1] = sinfo->code_buf[sinfo->last_byte-1]; + /* Load more bytes; set flag if we reach the terminator block */ + if ((count = GetDataBlock(sinfo, &sinfo->code_buf[2])) == 0) { + sinfo->out_of_blocks = TRUE; + WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA); + return sinfo->end_code; /* fake something useful */ + } + /* Reset counters */ + sinfo->cur_bit = (sinfo->cur_bit - sinfo->last_bit) + 16; + sinfo->last_byte = 2 + count; + sinfo->last_bit = sinfo->last_byte * 8; + } + + /* Form up next 24 bits in accum */ + offs = sinfo->cur_bit >> 3; /* byte containing cur_bit */ +#ifdef CHAR_IS_UNSIGNED + accum = sinfo->code_buf[offs+2]; + accum <<= 8; + accum |= sinfo->code_buf[offs+1]; + accum <<= 8; + accum |= sinfo->code_buf[offs]; +#else + accum = sinfo->code_buf[offs+2] & 0xFF; + accum <<= 8; + accum |= sinfo->code_buf[offs+1] & 0xFF; + accum <<= 8; + accum |= sinfo->code_buf[offs] & 0xFF; +#endif + + /* Right-align cur_bit in accum, then mask off desired number of bits */ + accum >>= (sinfo->cur_bit & 7); + ret = ((int) accum) & ((1 << sinfo->code_size) - 1); + + sinfo->cur_bit += sinfo->code_size; + return ret; +} + + +LOCAL(int) +LZWReadByte (gif_source_ptr sinfo) +/* Read an LZW-compressed byte */ +{ + register int code; /* current working code */ + int incode; /* saves actual input code */ + + /* First time, just eat the expected Clear code(s) and return next code, */ + /* which is expected to be a raw byte. */ + if (sinfo->first_time) { + sinfo->first_time = FALSE; + code = sinfo->clear_code; /* enables sharing code with Clear case */ + } else { + + /* If any codes are stacked from a previously read symbol, return them */ + if (sinfo->sp > sinfo->symbol_stack) + return (int) *(-- sinfo->sp); + + /* Time to read a new symbol */ + code = GetCode(sinfo); + + } + + if (code == sinfo->clear_code) { + /* Reinit state, swallow any extra Clear codes, and */ + /* return next code, which is expected to be a raw byte. */ + ReInitLZW(sinfo); + do { + code = GetCode(sinfo); + } while (code == sinfo->clear_code); + if (code > sinfo->clear_code) { /* make sure it is a raw byte */ + WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA); + code = 0; /* use something valid */ + } + /* make firstcode, oldcode valid! */ + sinfo->firstcode = sinfo->oldcode = code; + return code; + } + + if (code == sinfo->end_code) { + /* Skip the rest of the image, unless GetCode already read terminator */ + if (! sinfo->out_of_blocks) { + SkipDataBlocks(sinfo); + sinfo->out_of_blocks = TRUE; + } + /* Complain that there's not enough data */ + WARNMS(sinfo->cinfo, JWRN_GIF_ENDCODE); + /* Pad data with 0's */ + return 0; /* fake something usable */ + } + + /* Got normal raw byte or LZW symbol */ + incode = code; /* save for a moment */ + + if (code >= sinfo->max_code) { /* special case for not-yet-defined symbol */ + /* code == max_code is OK; anything bigger is bad data */ + if (code > sinfo->max_code) { + WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA); + incode = 0; /* prevent creation of loops in symbol table */ + } + /* this symbol will be defined as oldcode/firstcode */ + *(sinfo->sp++) = (UINT8) sinfo->firstcode; + code = sinfo->oldcode; + } + + /* If it's a symbol, expand it into the stack */ + while (code >= sinfo->clear_code) { + *(sinfo->sp++) = sinfo->symbol_tail[code]; /* tail is a byte value */ + code = sinfo->symbol_head[code]; /* head is another LZW symbol */ + } + /* At this point code just represents a raw byte */ + sinfo->firstcode = code; /* save for possible future use */ + + /* If there's room in table, */ + if ((code = sinfo->max_code) < LZW_TABLE_SIZE) { + /* Define a new symbol = prev sym + head of this sym's expansion */ + sinfo->symbol_head[code] = sinfo->oldcode; + sinfo->symbol_tail[code] = (UINT8) sinfo->firstcode; + sinfo->max_code++; + /* Is it time to increase code_size? */ + if ((sinfo->max_code >= sinfo->limit_code) && + (sinfo->code_size < MAX_LZW_BITS)) { + sinfo->code_size++; + sinfo->limit_code <<= 1; /* keep equal to 2^code_size */ + } + } + + sinfo->oldcode = incode; /* save last input symbol for future use */ + return sinfo->firstcode; /* return first byte of symbol's expansion */ +} + + +LOCAL(void) +ReadColorMap (gif_source_ptr sinfo, int cmaplen, JSAMPARRAY cmap) +/* Read a GIF colormap */ +{ + int i; + + for (i = 0; i < cmaplen; i++) { +#if BITS_IN_JSAMPLE == 8 +#define UPSCALE(x) (x) +#else +#define UPSCALE(x) ((x) << (BITS_IN_JSAMPLE-8)) +#endif + cmap[CM_RED][i] = (JSAMPLE) UPSCALE(ReadByte(sinfo)); + cmap[CM_GREEN][i] = (JSAMPLE) UPSCALE(ReadByte(sinfo)); + cmap[CM_BLUE][i] = (JSAMPLE) UPSCALE(ReadByte(sinfo)); + } +} + + +LOCAL(void) +DoExtension (gif_source_ptr sinfo) +/* Process an extension block */ +/* Currently we ignore 'em all */ +{ + int extlabel; + + /* Read extension label byte */ + extlabel = ReadByte(sinfo); + TRACEMS1(sinfo->cinfo, 1, JTRC_GIF_EXTENSION, extlabel); + /* Skip the data block(s) associated with the extension */ + SkipDataBlocks(sinfo); +} + + +/* + * Read the file header; return image size and component count. + */ + +METHODDEF(void) +start_input_gif (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + gif_source_ptr source = (gif_source_ptr) sinfo; + char hdrbuf[10]; /* workspace for reading control blocks */ + unsigned int width, height; /* image dimensions */ + int colormaplen, aspectRatio; + int c; + + /* Allocate space to store the colormap */ + source->colormap = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) MAXCOLORMAPSIZE, (JDIMENSION) NUMCOLORS); + + /* Read and verify GIF Header */ + if (! ReadOK(source->pub.input_file, hdrbuf, 6)) + ERREXIT(cinfo, JERR_GIF_NOT); + if (hdrbuf[0] != 'G' || hdrbuf[1] != 'I' || hdrbuf[2] != 'F') + ERREXIT(cinfo, JERR_GIF_NOT); + /* Check for expected version numbers. + * If unknown version, give warning and try to process anyway; + * this is per recommendation in GIF89a standard. + */ + if ((hdrbuf[3] != '8' || hdrbuf[4] != '7' || hdrbuf[5] != 'a') && + (hdrbuf[3] != '8' || hdrbuf[4] != '9' || hdrbuf[5] != 'a')) + TRACEMS3(cinfo, 1, JTRC_GIF_BADVERSION, hdrbuf[3], hdrbuf[4], hdrbuf[5]); + + /* Read and decipher Logical Screen Descriptor */ + if (! ReadOK(source->pub.input_file, hdrbuf, 7)) + ERREXIT(cinfo, JERR_INPUT_EOF); + width = LM_to_uint(hdrbuf[0],hdrbuf[1]); + height = LM_to_uint(hdrbuf[2],hdrbuf[3]); + colormaplen = 2 << (hdrbuf[4] & 0x07); + /* we ignore the color resolution, sort flag, and background color index */ + aspectRatio = hdrbuf[6] & 0xFF; + if (aspectRatio != 0 && aspectRatio != 49) + TRACEMS(cinfo, 1, JTRC_GIF_NONSQUARE); + + /* Read global colormap if header indicates it is present */ + if (BitSet(hdrbuf[4], COLORMAPFLAG)) + ReadColorMap(source, colormaplen, source->colormap); + + /* Scan until we reach start of desired image. + * We don't currently support skipping images, but could add it easily. + */ + for (;;) { + c = ReadByte(source); + + if (c == ';') /* GIF terminator?? */ + ERREXIT(cinfo, JERR_GIF_IMAGENOTFOUND); + + if (c == '!') { /* Extension */ + DoExtension(source); + continue; + } + + if (c != ',') { /* Not an image separator? */ + WARNMS1(cinfo, JWRN_GIF_CHAR, c); + continue; + } + + /* Read and decipher Local Image Descriptor */ + if (! ReadOK(source->pub.input_file, hdrbuf, 9)) + ERREXIT(cinfo, JERR_INPUT_EOF); + /* we ignore top/left position info, also sort flag */ + width = LM_to_uint(hdrbuf[4],hdrbuf[5]); + height = LM_to_uint(hdrbuf[6],hdrbuf[7]); + source->is_interlaced = BitSet(hdrbuf[8], INTERLACE); + + /* Read local colormap if header indicates it is present */ + /* Note: if we wanted to support skipping images, */ + /* we'd need to skip rather than read colormap for ignored images */ + if (BitSet(hdrbuf[8], COLORMAPFLAG)) { + colormaplen = 2 << (hdrbuf[8] & 0x07); + ReadColorMap(source, colormaplen, source->colormap); + } + + source->input_code_size = ReadByte(source); /* get min-code-size byte */ + if (source->input_code_size < 2 || source->input_code_size >= MAX_LZW_BITS) + ERREXIT1(cinfo, JERR_GIF_CODESIZE, source->input_code_size); + + /* Reached desired image, so break out of loop */ + /* If we wanted to skip this image, */ + /* we'd call SkipDataBlocks and then continue the loop */ + break; + } + + /* Prepare to read selected image: first initialize LZW decompressor */ + source->symbol_head = (UINT16 FAR *) + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + LZW_TABLE_SIZE * SIZEOF(UINT16)); + source->symbol_tail = (UINT8 FAR *) + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + LZW_TABLE_SIZE * SIZEOF(UINT8)); + source->symbol_stack = (UINT8 FAR *) + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + LZW_TABLE_SIZE * SIZEOF(UINT8)); + InitLZWCode(source); + + /* + * If image is interlaced, we read it into a full-size sample array, + * decompressing as we go; then get_interlaced_row selects rows from the + * sample array in the proper order. + */ + if (source->is_interlaced) { + /* We request the virtual array now, but can't access it until virtual + * arrays have been allocated. Hence, the actual work of reading the + * image is postponed until the first call to get_pixel_rows. + */ + source->interlaced_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) width, (JDIMENSION) height, (JDIMENSION) 1); + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + source->pub.get_pixel_rows = load_interlaced_image; + } else { + source->pub.get_pixel_rows = get_pixel_rows; + } + + /* Create compressor input buffer. */ + source->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, + (JDIMENSION) width * NUMCOLORS, (JDIMENSION) 1); + source->pub.buffer_height = 1; + + /* Return info about the image. */ + cinfo->in_color_space = JCS_RGB; + cinfo->input_components = NUMCOLORS; + cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */ + cinfo->image_width = width; + cinfo->image_height = height; + + TRACEMS3(cinfo, 1, JTRC_GIF, width, height, colormaplen); +} + + +/* + * Read one row of pixels. + * This version is used for noninterlaced GIF images: + * we read directly from the GIF file. + */ + +METHODDEF(JDIMENSION) +get_pixel_rows (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + gif_source_ptr source = (gif_source_ptr) sinfo; + register int c; + register JSAMPROW ptr; + register JDIMENSION col; + register JSAMPARRAY colormap = source->colormap; + + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + c = LZWReadByte(source); + *ptr++ = colormap[CM_RED][c]; + *ptr++ = colormap[CM_GREEN][c]; + *ptr++ = colormap[CM_BLUE][c]; + } + return 1; +} + + +/* + * Read one row of pixels. + * This version is used for the first call on get_pixel_rows when + * reading an interlaced GIF file: we read the whole image into memory. + */ + +METHODDEF(JDIMENSION) +load_interlaced_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + gif_source_ptr source = (gif_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register JSAMPROW sptr; + register JDIMENSION col; + JDIMENSION row; + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + + /* Read the interlaced image into the virtual array we've created. */ + for (row = 0; row < cinfo->image_height; row++) { + if (progress != NULL) { + progress->pub.pass_counter = (long) row; + progress->pub.pass_limit = (long) cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->interlaced_image, + row, (JDIMENSION) 1, TRUE); + sptr = image_ptr[0]; + for (col = cinfo->image_width; col > 0; col--) { + *sptr++ = (JSAMPLE) LZWReadByte(source); + } + } + if (progress != NULL) + progress->completed_extra_passes++; + + /* Replace method pointer so subsequent calls don't come here. */ + source->pub.get_pixel_rows = get_interlaced_row; + /* Initialize for get_interlaced_row, and perform first call on it. */ + source->cur_row_number = 0; + source->pass2_offset = (cinfo->image_height + 7) / 8; + source->pass3_offset = source->pass2_offset + (cinfo->image_height + 3) / 8; + source->pass4_offset = source->pass3_offset + (cinfo->image_height + 1) / 4; + + return get_interlaced_row(cinfo, sinfo); +} + + +/* + * Read one row of pixels. + * This version is used for interlaced GIF images: + * we read from the virtual array. + */ + +METHODDEF(JDIMENSION) +get_interlaced_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + gif_source_ptr source = (gif_source_ptr) sinfo; + JSAMPARRAY image_ptr; + register int c; + register JSAMPROW sptr, ptr; + register JDIMENSION col; + register JSAMPARRAY colormap = source->colormap; + JDIMENSION irow; + + /* Figure out which row of interlaced image is needed, and access it. */ + switch ((int) (source->cur_row_number & 7)) { + case 0: /* first-pass row */ + irow = source->cur_row_number >> 3; + break; + case 4: /* second-pass row */ + irow = (source->cur_row_number >> 3) + source->pass2_offset; + break; + case 2: /* third-pass row */ + case 6: + irow = (source->cur_row_number >> 2) + source->pass3_offset; + break; + default: /* fourth-pass row */ + irow = (source->cur_row_number >> 1) + source->pass4_offset; + break; + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, source->interlaced_image, + irow, (JDIMENSION) 1, FALSE); + /* Scan the row, expand colormap, and output */ + sptr = image_ptr[0]; + ptr = source->pub.buffer[0]; + for (col = cinfo->image_width; col > 0; col--) { + c = GETJSAMPLE(*sptr++); + *ptr++ = colormap[CM_RED][c]; + *ptr++ = colormap[CM_GREEN][c]; + *ptr++ = colormap[CM_BLUE][c]; + } + source->cur_row_number++; /* for next time */ + return 1; +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_input_gif (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +{ + /* no work */ +} + + /* * The module selection routine for GIF format input. */ @@ -30,9 +666,18 @@ GLOBAL(cjpeg_source_ptr) jinit_read_gif (j_compress_ptr cinfo) { - fprintf(stderr, "GIF input is unsupported for legal reasons. Sorry.\n"); - exit(EXIT_FAILURE); - return NULL; /* keep compiler happy */ + gif_source_ptr source; + + /* Create module interface object */ + source = (gif_source_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(gif_source_struct)); + source->cinfo = cinfo; /* make back link for subroutines */ + /* Fill in method ptrs, except get_pixel_rows which start_input sets */ + source->pub.start_input = start_input_gif; + source->pub.finish_input = finish_input_gif; + + return (cjpeg_source_ptr) source; } #endif /* GIF_SUPPORTED */ diff --git a/simd_README.ja.txt b/simd_README.ja.txt new file mode 100644 index 0000000..bc10b63 --- /dev/null +++ b/simd_README.ja.txt @@ -0,0 +1,145 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == README == +----------------------------------------------------------- + + ** Note ** +The accompanying documents related to x86 SIMD extension are written in +Japanese. The English version of these documents is currently unavailable. +I apologize for this inconvenience to international programmers. + +Most of the source code of the extension part is written in assembly +language. To compile the source, you need NASM (netwide assembler). +NASM is available from http://nasm.sourceforge.net/ or +http://sourceforge.net/project/showfiles.php?group_id=6208 . + +At present, the x86 SIMD extension doesn't support 64-bit mode of +AMD64 (x86_64). + +The x86 SIMD extension is an unofficial extension to the IJG JPEG +software. Please do not send any questions about this distribution +to the Independent JPEG Group. + +For conditions of distribution and use, see the IJG's README file. +The same conditions apply to this SIMD-extended JPEG software. + + + +¢£¤³¤Î¥½¥Õ¥È¤Ï + + JPEG ¤Î¥µ¥Ý¡¼¥È¥é¥¤¥Ö¥é¥ê¤È¤·¤Æ¹­¤¯»È¤ï¤ì¤Æ¤¤¤ë Independent JPEG Group's + JPEG library (libjpeg ¥é¥¤¥Ö¥é¥ê) ¤Ë¡¢Intel x86 ·Ï CPU ¤Î»ý¤Ä SIMD Ì¿Îá¤ò + ÍøÍѤ·¤¿¥³¡¼¥É(¥ë¡¼¥Á¥ó)¤ò¿·¤¿¤ËÄɲä·¡¢¹â®²½²þ¤¤·¤¿¤â¤Î¤Ç¤¹¡£ + MMX ¤ä SSE ¤Ê¤É¤Î SIMD ±é»»µ¡Ç½¤òÁõÈ÷¤·¤Æ¤¤¤ë¥×¥í¥»¥Ã¥µ¾å¤Çưºî¤µ¤»¤ë¤È¡¢ + ¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤ÈÈæ³Ó¤·¤Æ 2¡Á3 ÇÜÄøÅ٤ήÅÙ¤ÇÆ°ºî¤·¤Þ¤¹¡£ + ¤Þ¤¿¡¢SIMD ²½¤Ë°Í¤é¤Ê¤¤¹â®²½²þ¤¤â¤¤¤¯¤Ä¤«»Ü¤µ¤ì¤Æ¤ª¤ê¡¢SIMD ±é»»¤Î»È¤¨ + ¤Ê¤¤µì·¿CPU¤Ë¤ª¤¤¤Æ¤â¡¢¥ª¥ê¥¸¥Ê¥ëÈǤÈÈæ³Ó¤·¤Æ½½¿ô¡óÄøÅٹ⮤Ëưºî¤·¤Þ¤¹¡£ + + JPEG °µ½Ì¡¿Å¸³«½èÍý¤Î¹â®²½¤òÌÜŪ¤È¤·¤Æ¤¤¤Þ¤¹¤¬¡¢Æ°ºî®ÅÙºÇÍ¥Àè¤Ç¤Ï¤Ê¤¯¡¢ + ¥ª¥ê¥¸¥Ê¥ëÈÇ¤ÈÆ±Åù°Ê¾å¤Î·×»»ÀºÅÙ¤ò»ý¤Ä¤³¤È¤òºÇÍ¥Àè¤Ë¹Í¤¨¤¿¥³¡¼¥É¤òºÎÍÑ + ¤·¤Æ¤¤¤Þ¤¹¡£¼ÂºÝ¡¢DCT±é»»¤ËÉâÆ°¾®¿ôÅÀDCT¤ò»È¤Ã¤¿¾ì¹ç¡¢¤ª¤è¤Ó¡¢¤ä¤äÆÃ¼ì¤Ê + ¥µ¥ó¥×¥ê¥ó¥°Èæ(h1v2)¤ò»ý¤ÄJPEG¥Õ¥¡¥¤¥ë¤òŸ³«¤¹¤ë¾ì¹ç¤ò½ü¤¤¤Æ¤Ï¡¢ + ¥ª¥ê¥¸¥Ê¥ëÈǤȣ±¥Ó¥Ã¥È¤â°ã¤ï¤Ê¤¤·ë²Ì¤ò½Ð¤·¤Þ¤¹¡£¾åµ­¤Î£²¤Ä¤ÎÎã³°¤Î¾ì¹ç¤â + ¥ª¥ê¥¸¥Ê¥ëÈǤè¤ê¤Ï¹â²è¼Á²½(¹âÀºÅÙ²½)¤µ¤ì¤Æ¤¤¤Þ¤¹¡£ + + SIMD Âбþ²½¤ËºÝ¤·¤Æ¤Ï¡¢²Äǽ¤Ê¸Â¤ê¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤È¤Î + ¸ß´¹À­¤¬¼º¤ï¤ì¤Ê¤¤¤è¤¦¤Ë¹Í褵¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¤Û¤È¤ó¤É¤Î¾ì¹ç¡¢¥ª¥ê¥¸¥Ê¥ë + ÈǤò¤½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È¤¬²Äǽ¤Ç¤¹¡£ÆÃ¤Ë¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤Ë´Ø¤·¤Æ¸À¤¨¤Ð¡¢ + °ìÉô¤ÎÎã³°(cygwin ¤Î¾ì¹ç)¤ò½ü¤­¡¢¤½¤ì¤Ï¥ª¥ê¥¸¥Ê¥ëÈǤȥХ¤¥Ê¥ê¥ì¥Ù¥ë¤Ç¤Î + ¾å°Ì¸ß´¹À­¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¤½¤Î¤Þ¤Þ¥ª¥ê¥¸¥Ê¥ëÈǤòÃÖ¤­´¹¤¨¤ë¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£ + + SIMD Âбþ²½¤µ¤ì¤Æ¤¤¤ëÉôʬ¤Ï¡¢°Ê²¼¤Î¤È¤ª¤ê¡§ + + °µ½Ì½èÍý¡§ + ¿§¶õ´ÖÊÑ´¹(RGB->YCbCr) : MMX or SSE2 + ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° : MMX or SSE2 + DCT½çÊÑ´¹(¹âÀºÅÙÀ°¿ô) : MMX or SSE2 + DCT½çÊÑ´¹(¹â®À°¿ô) : MMX or SSE2 + DCT½çÊÑ´¹(ÉâÆ°¾®¿ô) : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2) + DCT·¸¿ôÎ̻Ҳ½(À°¿ô) : MMX or SSE2 + DCT·¸¿ôÎ̻Ҳ½(ÉâÆ°¾®¿ô) : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2) + + Ÿ³«½èÍý¡§ + ¿§¶õ´ÖÊÑ´¹(YCbCr->RGB) : MMX or SSE2 + ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° : MMX or SSE2 + DCTµÕÊÑ´¹(¹âÀºÅÙÀ°¿ô) : MMX or SSE2 + DCTµÕÊÑ´¹(¹â®À°¿ô) : MMX or SSE2 + DCTµÕÊÑ´¹(ÉâÆ°¾®¿ô) : 3DNow! or SSE (À°¿ô±é»»Éô: MMX or SSE2) + DCTµÕÊÑ´¹(½Ì¾®Å¸³«) : MMX or SSE2 + + Ãí¡ËSSE2 ¤Ë¤Ä¤¤¤Æ¤Ï¡¢SIMD À°¿ô±é»»¤Î¤ß¤òÍøÍѤ·¤Æ¤¤¤Þ¤¹¡£SIMD ÇÜÀºÅÙ + ÉâÆ°¾®¿ôÅÀ±é»»¤ÏÍøÍѤ·¤Æ¤¤¤Þ¤»¤ó¡£¤Þ¤¿¡¢SSE3 ¤Ï»ÈÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¡£ + ¤³¤Î JPEG ¥é¥¤¥Ö¥é¥ê¤Ë¤ª¤¤¤Æ¤Ï¡¢SSE3 ¤ò»ÈÍѤ·¤Æ¤âưºî®ÅÙ¸þ¾å¤Î + ¸«¹þ¤ß¤¬¤Û¤È¤ó¤É¤Ê¤¤¤¿¤á¤Ç¡¢SSE3 ¤ò¥µ¥Ý¡¼¥È¤¹¤ëͽÄê¤Ï¤¢¤ê¤Þ¤»¤ó¡£ + + ¤³¤Î¤Û¤«¤Ë¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ìÈÇDCT¥ë¡¼¥Á¥ó(ÈóSIMD; ½çÊÑ´¹£³¼ï¡¿µÕÊÑ´¹£´¼ï) + ¤Ë¤è¤ê¡¢SIMDÌ¿Îá¤Î»È¤¨¤Ê¤¤µì·¿CPU¤Ç¤â½½¿ô¡óÄøÅ٤ι⮲½¤¬´üÂԤǤ­¤Þ¤¹¡£ + ¤µ¤é¤Ë¡¢Å¸³«½èÍý¤Ç¤Î¥Ï¥Õ¥Þ¥ó¥Ç¥³¡¼¥É¥ë¡¼¥Á¥ó¤Ï¡¢SIMD ²½¤Ë°Í¤é¤Ê¤¤ÊýË¡¤Ç + ¹â®²½²þ¤¤µ¤ì¤Æ¤¤¤Þ¤¹¡£ + + +¢£Âбþ¤·¤Æ¤¤¤ë¥×¥é¥Ã¥È¥Õ¥©¡¼¥à + + Intel x86 CPU ¤Ë¸ÇÍ­¤Îµ¡Ç½¤òÍøÍѤ·¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤꡢ + Intel x86 CPU ¤ª¤è¤Ó¤½¤Î¸ß´¹ CPU ¤òºÎÍѤ·¤Æ¤¤¤ë¥·¥¹¥Æ¥à¤Ë¸Â¤é¤ì¤Þ¤¹¡£ + PowerPC ¤Ê¤É¤Î Intel x86 ·Ï°Ê³°¤Î¥·¥¹¥Æ¥à¤Ë¤ÏÂбþ¤·¤Æ¤¤¤Þ¤»¤ó¡£ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ¶ñÂÎŪ¤Ë¤Ï¡¢80386 °Ê¹ß¤Î Intel x86 CPU ¤ª¤è¤Ó¤½¤Î¸ß´¹ CPU ¤òºÎÍѤ·¤Æ¤¤¤ë + ¥Ï¡¼¥É¥¦¥§¥¢¤Ç¡¢¤«¤Ä¡¢32bit¥Õ¥é¥Ã¥È¥¢¥É¥ì¥¹¥â¡¼¥É(Êݸî¥â¡¼¥É)¤ò»ÈÍѤ·¤Æ + ¤¤¤ë¥×¥é¥Ã¥È¥Õ¥©¡¼¥à(OS)¤¬ÂоݤǤ¹¡£¤³¤ì¤Ë¤Ï¡¢Win32 (Windows 9x·Ï/NT·Ï) + ¤ä³Æ¼ï PC-UNIX (linux ¤ä xBSD ¥Õ¥¡¥ß¥ê¤Ê¤É) ¤Ê¤É¤¬³ºÅö¤·¤Þ¤¹¡£¤Ê¤ª¡¢ + AMD64 (EM64T) ¤Î64bit¥â¡¼¥É´Ä¶­¤Ë¤ÏÂбþ¤·¤Æ¤¤¤Þ¤»¤ó¡£¤´Ãí°Õ¤¯¤À¤µ¤¤¡£ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +¢£¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG library ¸ÇÍ­¤ÎÀ©¸Â + + ¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG library ¤Ç¤Ï¡¢¥³¥ó¥Ñ¥¤¥ë»þ¤Î¥ª¥×¥·¥ç¥ó¤Ç¡¢ + 8bitÀºÅÙJPEG ¤È 12bitÀºÅÙJPEG ¤ÎξÊý¤ËÂбþ¤·¤Þ¤¹¤¬¡¢¤³¤Î SIMD ³ÈÄ¥ÈÇ¤Ï + 8bitÀºÅÙJPEG ¤Î¤ß¤ÎÂбþ¤Ç¡¢12bitÀºÅÙJPEG ¤Ë¤ÏÂбþ¤·¤Þ¤»¤ó¡£¤È¤Ï¤¤¤¨¡¢ + 12bitÀºÅÙJPEG ¤Ï°åÎÅÍÑ¤Ê¤É¤ÎÆÃ¼ìʬÌî¤ò½ü¤¤¤ÆËؤɻȤï¤ì¤Æ¤¤¤Ê¤¤¤Î¤Ç¡¢ + ÌäÂê¤Ï¾¯¤Ê¤¤¤È»×¤¤¤Þ¤¹¡£ + + +¢£»È¤¤Êý + + ¥Þ¥Ë¥å¥¢¥ë¤Ï¡¢°Ê²¼¤Î¥Õ¥¡¥¤¥ë¤Ëʬ¤«¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢¼ÂºÝ¤Î»È¤¤Êý¤Ê¤É¤Ë + ¤Ä¤¤¤Æ¤Ï¡¢¤½¤Á¤é¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + simd_README.ja.txt - ¤³¤Î¥Õ¥¡¥¤¥ë + simd_filelist.ja.txt - ¼ýÏ¿¥Õ¥¡¥¤¥ë¤Î¥Õ¥¡¥¤¥ë¥ê¥¹¥È + simd_install.ja.txt - ¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý + simd_internal.ja.txt - SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ + simd_cdjpeg.ja.txt - SIMD ÈÇ cjpeg/djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤Î²òÀâ + simd_changes.ja.txt - SIMD ³ÈÄ¥Éôʬ¤Î²þÈÇÍúÎò + + +¢£»ÈÍѾò·ï¡¦¥µ¥Ý¡¼¥È + + ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Î»ÈÍѾò·ï¤Ë¤Ä¤¤¤Æ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠ+ IJG JPEG software ¤Î»ÈÍѾò·ï¤¬Å¬ÍѤµ¤ì¤Þ¤¹¡£¾Ü¤·¤¯¤Ï¡¢Æ±º­¤Î README + ¥Õ¥¡¥¤¥ë(±Ñʸ)¤Î LEGAL ISSUES ¤Î¹à¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + ¾åµ­¤Î»ÈÍѾò·ï¤ÎÆâÍÆ¤Î·«¤êÊÖ¤·¤Ë¤Ê¤ê¤Þ¤¹¤¬¡¢¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤Ï¡Ö¸½¾õ¤Î + ¤Þ¤Þ¤Ç¡×Ä󶡤µ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¡¢¾¦¶ÈŪ¤Ê»ÈÍѲÄǽÀ­¡¢¤ª¤è¤ÓÆÃÄê¤ÎÌÜŪ¤Ë + ÂФ¹¤ëŬ¹çÀ­¤Ê¤É¤â´Þ¤á¡¢¤¤¤«¤Ê¤ëÊݾڤ⤢¤ê¤Þ¤»¤ó¡£ + ¤Þ¤¿¡¢¸¶ºî¼Ô(The Independent JPEG Group)¤â²þ¤¼Ô(MIYASAKA Masaru)¤â¡¢ + »öͳ¤Î¤¤¤«¤ó¤òÌä¤ï¤º¡¢ËÜ¥½¥Õ¥È¥¦¥§¥¢¤Î»ÈÍѤˤè¤Ã¤ÆÈ¯À¸¤·¤¿Ç¡²¿¤Ê¤ë»³²¤Ë + ¤Ä¤¤¤Æ¤â¡¢°ìÀÚ¤½¤ÎÀÕǤ¤òÉé¤ï¤Ê¤¤¤â¤Î¤È¤·¤Þ¤¹¡£ + + ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ë³«È¯¸µ¤Î IJG ¤È¤Ï´Ø·¸ + ¤Ê¤¯¡¢ÆÈ¼«¤Ë³ÈÄ¥¤ò¹Ô¤Ê¤Ã¤¿¤â¤Î¤Ç¤¹¡£¤Ç¤¹¤Î¤Ç¡¢¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG + software ¤Ë´Ø¤¹¤ë¼ÁÌä¤ò¡¢¥ª¥ê¥¸¥Ê¥ë³«È¯¸µ (The Independent JPEG Group) + ¤ËÁ÷¤é¤Ê¤¤¤Ç¤¯¤À¤µ¤¤¡£ + + ¤³¤Î SIMD ³ÈÄ¥ÈÇ IJG JPEG software ¤Ë´Ø¤·¤Æ¤Ï¡¢¸¶Â§¤È¤·¤Æ¥Î¡¼¥µ¥Ý¡¼¥È¤È + ¤µ¤»¤Æ¤¤¤¿¤À¤­¤Þ¤¹¡£¥á¡¼¥ë¤Ê¤É¤Ç¤´¼ÁÌä¤Ê¤É¤ò¤¤¤¿¤À¤­¤Þ¤·¤Æ¤â¡¢¾ï¤Ë²¿¤é¤« + ¤ÎÊÖÅú¤¬¤Ç¤­¤ë¤ï¤±¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¤Î¤Ç¡¢¤´¾µÃΤª¤­¤¯¤À¤µ¤¤¡£ + ÆÃ¤Ë¡¢¡Ê¥ª¥ê¥¸¥Ê¥ë¤Î±Ñʸ¥Þ¥Ë¥å¥¢¥ë¤ò´Þ¤á¡ËƱº­¤Î¥Þ¥Ë¥å¥¢¥ëÎà¤Ë²óÅú¤¬ + ½ñ¤¤¤Æ¤¢¤ë¼ÁÌä¤ä¡¢»ÈÍѼԤΥ½¥Õ¥È¥¦¥§¥¢µ»½Ñ¼Ô¤È¤·¤Æ¤Îµ»ÎÌÉÔ­¡¦·Ð¸³ÉÔ­¤Ë + ´Ø¤ï¤ë¼ÁÌä¡¢¼ÁÌä¤ÎÍ×ÎΤòÆÀ¤Ê¤¤¼ÁÌä¤Ê¤É¤Ë¤Ä¤¤¤Æ¤Ï¡¢²óÅú¤ò¤¤¤¿¤·¤Þ¤»¤ó¤Î¤Ç¡¢ + ¤¢¤·¤«¤é¤º¤´Î»¾µ¤¯¤À¤µ¤¤¡£ + + + + E-Mail Address : alkaid@coral.ocn.ne.jp (µÜºä ¸­/MIYASAKA Masaru) +[EOF] diff --git a/simd_cdjpeg.ja.txt b/simd_cdjpeg.ja.txt new file mode 100644 index 0000000..941a15a --- /dev/null +++ b/simd_cdjpeg.ja.txt @@ -0,0 +1,75 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == CDJPEG == +----------------------------------------------------------- + +¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï + + ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈǤΠcjpeg / djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤ò²òÀ⤷¤Þ¤¹¡£ + + ¢£ SIMD ưºî¥â¡¼¥É¾ðÊó (-v ¥ª¥×¥·¥ç¥ó) + ¢£ ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë (-noXXX ¥ª¥×¥·¥ç¥ó) + ¢£ GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤· (djpeg ¤Î -gif ¥ª¥×¥·¥ç¥ó) + + +¢£ SIMD ưºî¥â¡¼¥É¾ðÊó (-v ¥ª¥×¥·¥ç¥ó) + + ¤³¤Î SIMD ÈÇ cjpeg / djpeg ¤Ç¤Ï¡¢-v ¥ª¥×¥·¥ç¥ó¤ò¤Ä¤±¤Æµ¯Æ°¤¹¤ë¤È¡¢°Ê²¼ + ¤Î¤è¤¦¤Ê SIMD ưºî¥â¡¼¥É¾ðÊ󤬥С¼¥¸¥ç¥ó¾ðÊó¤È¶¦¤Ëɽ¼¨¤µ¤ì¤Þ¤¹¡£ + + Independent JPEG Group's DJPEG, version 6b 27-Mar-1998 + Copyright (C) 1998, Thomas G. Lane + + x86 SIMD extension for IJG JPEG library, version 1.02 + + SIMD instructions supported by the system : MMX 3DNow! SSE SSE2 + + === SIMD Operation Modes === + Accurate integer DCT (-dct int) : SSE2 + Fast integer DCT (-dct fast) : SSE2 + Floating-point DCT (-dct float) : SSE + Reduced-size DCT (-scale M/N) : SSE2 + High-quality upsampling (default) : SSE2 + Low-quality upsampling (-nosmooth) : SSE2 + Colorspace conversion (YCbCr->RGB) : SSE2 + + "SIMD instructions supported by the system" ¤Î¹àÌܤÇÎóµó¤µ¤ì¤ë¤Î¤Ï¡¢ + ¥·¥¹¥Æ¥à(CPU/OS)¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤ë SIMD ±é»»¤Î¼ïÎà¤Ç¤¹¡£¤Ê¤ª¡¢¤³¤Î + ¥½¥Õ¥È¤Ç¤Ï SSE3 ¤Ï»ÈÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¤· SSE3 ¤Î¥µ¥Ý¡¼¥È¤Î¸¡½Ð¤â¹Ô¤Ê¤ï¤ì + ¤Þ¤»¤ó¤Î¤Ç¡¢SSE3 ¤¬¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤Æ¤â¤³¤Î¹àÌܤˤϸ½¤ì¤Þ¤»¤ó¡£ + + ¤½¤Î²¼¤Î "SIMD Operation Modes" ¤Ï¡¢³Æ½èÍýÃʳ¬¤Ç»ÈÍѤµ¤ì¤ë SIMD ±é»»¤Î + ¼ïÎà¤Ç¤¹¡£Floating-point DCT ¤Ç¤Ï 3DNow! ¤« SSE ¡¢¤½¤ì°Ê³°¤Î¤È¤³¤í¤Ç¤Ï + MMX ¤« SSE2 ¤¬ÁªÂò¤µ¤ì¤Þ¤¹¡£°ìÈ̤ˡ¢SSE/SSE2 ¤ÎÊý¤¬ MMX/3DNow! ¤è¤ê¤â + ¹âÀ­Ç½¤È¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤ÎÁÐÊý¤¬ÍøÍѲÄǽ¤Ê¾ì¹ç¤Ï SSE/SSE2 ¤¬Í¥ÀèŪ¤ËÁªÂò + ¤µ¤ì¤Þ¤¹¡£ + + +¢£ ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë (-noXXX ¥ª¥×¥·¥ç¥ó) + + ¤³¤Î SIMD ÈÇ cjpeg / djpeg ¤Ç¤Ï¡¢°Ê²¼¤Î¤è¤¦¤Ê¥ª¥×¥·¥ç¥ó¤ò»ØÄꤹ¤ë¤³¤È¤Ç¡¢ + ÆÃÄê¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤Ç¤­¤Þ¤¹¡£ + + -nommx MMX ¤ò»ÈÍѤ·¤Ê¤¤ + -no3dnow 3DNow! ¤ò»ÈÍѤ·¤Ê¤¤ + -nosse SSE ¤ò»ÈÍѤ·¤Ê¤¤ + -nosse2 SSE2 ¤ò»ÈÍѤ·¤Ê¤¤ + -nosimd ¤¹¤Ù¤Æ¤Î SIMD ±é»»¤ò»ÈÍѤ·¤Ê¤¤ + + ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ï¡¢cjpeg/djpeg ¤Î¥³¥Þ¥ó¥É¥é¥¤¥ó¤ÎÀèÆ¬¤Ë»ØÄꤹ¤ë¤è¤¦ + ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + + +¢£ GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤· (djpeg ¤Î -gif ¥ª¥×¥·¥ç¥ó) + + ¥ª¥ê¥¸¥Ê¥ëÈÇ cjpeg/djpeg ¤Î version 6b ¤Ç¤Ï¡¢ÆÃµö¾å¤ÎÌäÂ꤫¤é¡¢GIF ·Á¼° + ²èÁü¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤¬¥µ¥Ý¡¼¥È¤µ¤ì¤Ê¤¯¤Ê¤Ã¤Æ¤¤¤Þ¤·¤¿¡£¤Ç¤¹¤¬¡¢ + GIF ¤Ë´Ø¤¹¤ëÆÃµö¤¬ 2003¡Á2004 ǯ¤Ë¤«¤±¤ÆÀ¤³¦Åª¤Ë´ü¸ÂÀÚ¤ì¤Ë¤Ê¤Ã¤¿¤¿¤á¡¢ + Åö SIMD ÈÇ¤Ç¤Ï GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤òÉü³è¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£ + + ;Ã̤Ǥ¹¤¬¡¢Åö SIMD ÈÇ¤Ç GIF ·Á¼°¤ÎÆÉ¤ß¹þ¤ß¡¿½ñ¤­½Ð¤·¤Ë»ÈÍѤ·¤Æ¤¤¤ë + ¥â¥¸¥å¡¼¥ë¤Ï¡¢µìÈǤǤ¢¤ë version 6a ¤Î¤â¤Î¤òήÍѤ·¤Æ¤¤¤Þ¤¹¡£ + + + +[EOF] diff --git a/simd_changes.ja.txt b/simd_changes.ja.txt new file mode 100644 index 0000000..b256002 --- /dev/null +++ b/simd_changes.ja.txt @@ -0,0 +1,24 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == CHANGES == +----------------------------------------------------------- + +IJG R6b with x86SIMD V1.02 (2006-02-04) +--------------------------------------- +* x86 ÈÇ Darwin ¤ËÂбþ¤·¤Þ¤·¤¿¡£Æ°ºî³Îǧ¤Ï Darwin 8.0.1 for x86 ¤Ë¤Æ + ¹Ô¤Ê¤¤¤Þ¤·¤¿¡£x86 ÈÇ Mac OS X ¤Ç¤â¡¢Æ°ºî¤¹¤ë¤â¤Î¤È»×¤ï¤ì¤Þ¤¹¡£ + ¤Þ¤¿¡¢Solaris 10 ¤Ç¤Îưºî³Îǧ¤â¹Ô¤Ê¤¤¤Þ¤·¤¿¡£ + +IJG R6b with x86SIMD V1.01 (2006-01-26) +--------------------------------------- +* jsimdgcc.c ¤ò»È¤Ã¤¿¤È¤­¡¢NEED_SHORT_EXTERNAL_NAMES ¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤È + Àµ¾ï¤Ë¥ê¥ó¥¯¤Ç¤­¤Ê¤¤¥Ð¥°¤ò½¤Àµ¤·¤Þ¤·¤¿¡£ +* °µ½Ì¦¤Î¥³¡¼¥É¤Î°ìÉô(jcsammmx.asm, jcsamss2.asm, jcqnt3dn.asm)¤Ë¤ä¤ä + ¾éĹ¤Ê²Õ½ê¤¬¤¢¤Ã¤¿¤Î¤Ç¡¢¤³¤ì¤ò½¤Àµ¤·¤Þ¤·¤¿¡£ + +IJG R6b with x86SIMD V1.0 (2006-01-10) +-------------------------------------- +* ºÇ½é¤Î¸ø³«ÈÇ¡£ + + +[EOF] diff --git a/simd_filelist.ja.txt b/simd_filelist.ja.txt new file mode 100644 index 0000000..4bee431 --- /dev/null +++ b/simd_filelist.ja.txt @@ -0,0 +1,261 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == FILELIST == +----------------------------------------------------------- + +¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï + + ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ IJG JPEG software ¤ÎÇÛÉÛ¥¢¡¼¥«¥¤¥Ö¤Ë¼ý¤á¤é¤ì¤Æ + ¤¤¤ë³Æ¥Õ¥¡¥¤¥ë¤Î³µÍפò²òÀ⤷¤Þ¤¹¡£¤Ê¤ª¡¢¤³¤³¤Ç¤Ï x86 SIMD extension ¤Ç + ¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë¤È¡¢x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤Æµ¡Ç½¡¿ + Ìò³ä¤¬¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤë¥Õ¥¡¥¤¥ë¤Î¤ß¤ò²òÀ⤷¤Þ¤¹¡£¤½¤ì°Ê³°¤Î¥Õ¥¡¥¤¥ë + ¤Ë¤Ä¤¤¤Æ¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠfilelist.doc (±Ñʸ) ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + ¢£¥Þ¥Ë¥å¥¢¥ëÎà + ¢£configure ¥¹¥¯¥ê¥×¥È¤Ë´Ø·¸¤¹¤ë¥Õ¥¡¥¤¥ë + ¢£Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë·² (vc6proj/) + ¢£ÆÃÄê¤Î¥³¥ó¥Ñ¥¤¥éÍѤΠjconfig.h ¤È Makefile + ¢£¥½¡¼¥¹¥Õ¥¡¥¤¥ë + ¡ü x86 SIMD extension ¤Ç¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë + ¡ü x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë + ¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/) + ¢£SIMD ÈǤǤϻȤï¤ì¤Ê¤¤¥Õ¥¡¥¤¥ë·² (unused/) + + +¢£¥Þ¥Ë¥å¥¢¥ëÎà + + °Ê²¼¤Î SIMD ÈǤΥޥ˥奢¥ë¤Î¾¤Ë¡¢¥ª¥ê¥¸¥Ê¥ëÈǤαÑʸ¥Þ¥Ë¥å¥¢¥ë¤â + ¤½¤Î¤Þ¤Þ¼ýÏ¿¤·¤Æ¤¢¤ê¤Þ¤¹¡£Ê»¤»¤Æ»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + simd_README.ja.txt ¼ç¥Þ¥Ë¥å¥¢¥ë(x86 SIMD extension ¤Î³µÍפʤÉ) + simd_filelist.ja.txt SIMD ÈÇ IJG JPEG software ¤Î¥Õ¥¡¥¤¥ë¥ê¥¹¥È + simd_install.ja.txt SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý + simd_internal.ja.txt SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¡¢SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ + simd_cdjpeg.ja.txt SIMD ÈǤΠcjpeg / djpeg ¤Ë¸ÇÍ­¤Îµ¡Ç½¤Î²òÀâ + simd_changes.ja.txt SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î²þÈÇÍúÎò + + +¢£ configure ¥¹¥¯¥ê¥×¥È¤Ë´Ø·¸¤¹¤ë¥Õ¥¡¥¤¥ë + + UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ë¾ì¹ç¤ËɬÍפȤʤë¥Õ¥¡¥¤¥ë·² + ¤Ç¤¹¡£Èó UNIX ´Ä¶­¤Ç¤Ï¡¢ºï½ü¤·¤Æ¤â¤«¤Þ¤¤¤Þ¤»¤ó¡£ + + configure configure ¥¹¥¯¥ê¥×¥ÈËÜÂÎ + config.ver configure ¤«¤é¸Æ¤Ó½Ð¤µ¤ì¤ë¥¹¥¯¥ê¥×¥È¤Ç¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î + ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òÄêµÁ¤·¤Æ¤¤¤Þ¤¹ + ltmain.sh configure ¤Î¥µ¥Ý¡¼¥È¥¹¥¯¥ê¥×¥È (from GNU libtool) + config.guess ¡· + config.sub ¡· + install-sh install ¥³¥Þ¥ó¥É¤¬¤Ê¤¤¾ì¹ç¤ÎÂåÍÑ¥¹¥¯¥ê¥×¥È + nasm_lt.sh nasm ¤ò GNU libtool ¤Ç»È¤¦¾ì¹ç¤Î¥é¥Ã¥Ñ¡¦¥¹¥¯¥ê¥×¥È + jconfig.cfg configure ¤¬À¸À®¤¹¤ë jconfig.h ¤Î¿÷·¿¥Õ¥¡¥¤¥ë + makefile.cfg configure ¤¬À¸À®¤¹¤ë Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë + configure.in configure ¥¹¥¯¥ê¥×¥È¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë (for GNU autoconf) + aclocal.m4 ¡· + libjpeg.spec RPM ¤òºÎÍѤ·¤Æ¤¤¤ë linux ¥·¥¹¥Æ¥à¸þ¤±¤Î spec ¥Õ¥¡¥¤¥ë + + +¢£ Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë·² (vc6proj/) + + ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤ò»È¤¦¾ì¹ç¤Ï¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤¬¤¢¤ë°ì¤Ä¾å¤Î¥Õ¥©¥ë¥À¤Ë + ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤ò¤¹¤Ù¤Æ°Üư¤·¤Æ¤¯¤À¤µ¤¤¡£¾Ü¤·¤¯¤Ï¡¢simd_install.ja.txt + ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç¤Ï¡¢¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë + ¤òÊÑ´¹(¥¤¥ó¥Ý¡¼¥È)¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£ + + vc6proj/libjpeg.dsw ¥×¥í¥¸¥§¥¯¥È¡¦¥ï¡¼¥¯¥¹¥Ú¡¼¥¹ + vc6proj/makecfg.dsp libjpeg.dsp (libjpeg.lib) ¤Î¥Ó¥ë¥É¤ËɬÍ×¤Ê + ÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºî¤ë + vc6proj/libjpeg.dsp libjpeg.lib ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/cjpeg.dsp cjpeg.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/djpeg.dsp djpeg.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/jpegtran.dsp jpegtran.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/rdjpgcom.dsp rdjpgcom.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/wrjpgcom.dsp wrjpgcom.exe ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + vc6proj/apptest.dsp cjpeg, djpeg, jpegtran ¤Îưºî¥Æ¥¹¥È(make test) + vc6proj/jconfig.h VC++ ÍѤΠjconfig.h (jconfig.vc ¤ÈƱ¤¸¤â¤Î) + + +¢£ÆÃÄê¤Î¥³¥ó¥Ñ¥¤¥éÍѤΠjconfig.h ¤È Makefile + + ¾Ü¤·¤¯¤Ï¡¢simd_install.ja.txt ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤Ë¤Ï¡¢ + SIMD ²½¤Ëȼ¤Ã¤Æ¿·¤¿¤ËÄɲ䵤줿¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Ë´Ø¤¹¤ëµ­½Ò¤¬Äɲäµ¤ì¤Æ + ¤¤¤Þ¤¹¤«¤é¡¢¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î jconfig.* ¤È Makefile.* ¤Ï¡¢ + ¤³¤Î SIMD ÈǤǤϻÈÍѤǤ­¤Þ¤»¤ó¡£ + + jconfig.bc5 Borland C++ Compiler 5.5 (win32) ÍѤΠjconfig.h + makefile.bc5 Borland C++ Compiler 5.5 (win32) ÍѤΠMakefile + jconfig.dj DJGPP v2.0 ÍѤΠjconfig.h + makefile.dj DJGPP v2.0 ÍѤΠMakefile + jconfig.mgw MinGW ÍѤΠjconfig.h + makefile.mgw MinGW ÍѤΠMakefile (ÀÅŪ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®) + makefile.mgwdll MinGW ÍѤΠMakefile (DLL ÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®) + jconfig.vc VC++ ÍѤΠjconfig.h + makefile.vc VC++ ÍѤΠMakefile (ÀÅŪ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®) + makefile.vcdll VC++ ÍѤΠMakefile (DLLÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤òºîÀ®) + jconfig.linux linux ÍѤΠjconfig.h (Ãí¡§configure ¤Î»ÈÍѤò¿ä¾©) + makefile.linux linux ÍѤΠMakefile (Ãí¡§configure ¤Î»ÈÍѤò¿ä¾©) + + °Ê²¼¤Î¥Õ¥¡¥¤¥ë¤Ï¡¢¾åµ­°Ê³°¤Î¥³¥ó¥Ñ¥¤¥é¤ËÂбþ¤¹¤ë jconfig.h ¤È Makefile + ¤ò¿·¤¿¤ËºîÀ®¤¹¤ë¾ì¹ç¤Î¿÷·¿¤È¤Ê¤ë¥Õ¥¡¥¤¥ë¤Ç¤¹¡£¥ª¥ê¥¸¥Ê¥ëÈǤˤ⸺ߤ·¤Þ¤¹ + ¤¬¡¢SIMD ²½¤Ëȼ¤¦Êѹ¹¤¬»Ü¤µ¤ì¤Æ¤¤¤Þ¤¹¡£ + + ckconfig.c jconfig.h ¤òÀ¸À®¤¹¤ë¥×¥í¥°¥é¥à + makefile.ansi Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë (ANSI ¥³¥ó¥Ñ¥¤¥éÍÑ) + makefile.unix Makefile ¤Î¿÷·¿¥Õ¥¡¥¤¥ë (Èó ANSI ¥³¥ó¥Ñ¥¤¥éÍÑ) + + +¢£¥½¡¼¥¹¥Õ¥¡¥¤¥ë + + ¡ü x86 SIMD extension ¤Ç¿·¤¿¤ËÄɲ䵤줿¥Õ¥¡¥¤¥ë + + jccolmmx.asm RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (MMX) + jccolss2.asm RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (SSE2) + jcsammmx.asm ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (MMX) + jcsamss2.asm ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (SSE2) + + jdcolmmx.asm YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (MMX) + jdcolss2.asm YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (SSE2) + jdsammmx.asm ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (MMX) + jdsamss2.asm ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (SSE2) + jdmermmx.asm ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (MMX) + jdmerss2.asm ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (SSE2) + + jcqntint.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (ÈóSIMD, À°¿ô) + jcqntflt.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (ÈóSIMD, ÉâÆ°¾®¿ôÅÀ) + jcqntmmx.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (MMX, À°¿ô) + jcqnts2i.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE2, À°¿ô) + jcqnt3dn.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (3DNow! & MMX, ÉâÆ°¾®¿ôÅÀ) + jcqntsse.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE & MMX, ÉâÆ°¾®¿ôÅÀ) + jcqnts2f.asm ¥Ç¡¼¥¿ÊÑ´¹¤ÈÎ̻Ҳ½ (SSE & SSE2, ÉâÆ°¾®¿ôÅÀ) + + jfdctint.asm ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (ÈóSIMD) + jfmmxint.asm ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (MMX) + jfss2int.asm ¹âÀºÅÙÀ°¿ô(½çÊý¸þ)DCT (SSE2) + jfdctfst.asm ¹â®À°¿ô(½çÊý¸þ)DCT (ÈóSIMD) + jfmmxfst.asm ¹â®À°¿ô(½çÊý¸þ)DCT (MMX) + jfss2fst.asm ¹â®À°¿ô(½çÊý¸þ)DCT (SSE2) + jfdctflt.asm ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (ÈóSIMD) + jf3dnflt.asm ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (3DNow!) + jfsseflt.asm ÉâÆ°¾®¿ôÅÀ(½çÊý¸þ)DCT (SSE) + + jidctint.asm ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (ÈóSIMD) + jimmxint.asm ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (MMX) + jiss2int.asm ¹âÀºÅÙÀ°¿ô(µÕÊý¸þ)DCT (SSE2) + jidctfst.asm ¹â®À°¿ô(µÕÊý¸þ)DCT (ÈóSIMD) + jimmxfst.asm ¹â®À°¿ô(µÕÊý¸þ)DCT (MMX) + jiss2fst.asm ¹â®À°¿ô(µÕÊý¸þ)DCT (SSE2) + jidctflt.asm ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (ÈóSIMD) + ji3dnflt.asm ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (3DNow! & MMX) + jisseflt.asm ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (SSE & MMX) + jiss2flt.asm ÉâÆ°¾®¿ôÅÀ(µÕÊý¸þ)DCT (SSE & SSE2) + jidctred.asm ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (ÈóSIMD) + jimmxred.asm ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (MMX) + jiss2red.asm ½Ì¾®Å¸³«ÍÑ(µÕÊý¸þ)DCT (SSE2) + + jsimdcpu.asm CPU ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ + jsimddjg.asm OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for DJGPP V.2) + jsimdw32.asm OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for Win32) + jsimdgcc.c OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (for gcc) + + makecfg.c ¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºîÀ®¤¹¤ë + + jsimdext.inc ¥¢¥»¥ó¥Ö¥ê¸À¸ì¥½¡¼¥¹ÍѤζ¦Ḁ̈إåÀ¥Õ¥¡¥¤¥ë + jdct.inc DCT ´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë + jcolsamp.inc ¿§¶õ´ÖÊÑ´¹¡¿¥µ¥ó¥×¥ê¥ó¥°´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë + + jcolsamp.h ¿§¶õ´ÖÊÑ´¹¡¿¥µ¥ó¥×¥ê¥ó¥°´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë + ¥ª¥ê¥¸¥Ê¥ëÈǤˤϸºß¤·¤Ê¤¤£Ã¸À¸ì¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë¤Ç¡¢ + SIMD ²½¤ÇƳÆþ¤µ¤ì¤¿´Ø¿ô¤ÎÀë¸À¤¬µ­½Ò¤µ¤ì¤Æ¤¤¤Þ¤¹¡£ + + jpegdll.def DLL ÈÇ JPEG Library ÍѤδؿô¥¨¥¯¥¹¥Ý¡¼¥ÈÄêµÁ¥Õ¥¡¥¤¥ë + jpegdll.rc DLL ÈÇ JPEG Library ÍѤΥС¼¥¸¥ç¥ó¥ê¥½¡¼¥¹ÄêµÁ¥Õ¥¡¥¤¥ë + ¤³¤ì¤é¤Î¥Õ¥¡¥¤¥ë¤Ï¡¢IJG JPEG Library ¤ò¤½¤Î¤Þ¤Þ DLL ¤Ë + ¤¹¤ë¾ì¹ç¤Ë»ÈÍѤ·¤Þ¤¹(makefile.vcdll/makefile.mgwdll)¡£ + + ¡ü x86 SIMD extension ¤Ç²þÊѤ¬²Ã¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë + + Êѹ¹ÆâÍÆ¤Ë¤Ä¤¤¤ÆÆÃ¤Ëµ­½Ò¤Î¤Ê¤¤¥Õ¥¡¥¤¥ë¤Ë¤Ï¡¢SIMD ²½¤Ç¿·¤¿¤ËƳÆþ¤µ¤ì¤¿ + ´Ø¿ô¤ÎÀë¸À¤ä¤½¤Î¸Æ¤Ó½Ð¤·µ­½Ò¡¢SIMD ²½¤Ë´ØÏ¢¤¹¤ë¥Þ¥¯¥íÄêµÁ¤Ê¤É¤¬Äɲà + ¤µ¤ì¤Æ¤¤¤Þ¤¹¡£ + + jpeglib.h JPEG ¥é¥¤¥Ö¥é¥ê¤Î¥á¥¤¥ó¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë + jpegint.h JPEG ¥é¥¤¥Ö¥é¥ê¤ÎÆâÉôÍѥإåÀ¥Õ¥¡¥¤¥ë + jmorecfg.h JPEG ¥é¥¤¥Ö¥é¥ê¤Î¾ÜºÙÀßÄê¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë + + jdct.h DCT ´ØÏ¢¥Õ¥¡¥¤¥ëÍѤΥإåÀ¥Õ¥¡¥¤¥ë + SIMD ½èÍý¤ËŬ¤¹¤ë¤è¤¦¤Ë¡¢´ö¤Ä¤«¤ÎÊÑ¿ô¤Î·¿¤âÊѹ¹¤µ¤ì¤Æ + ¤¤¤Þ¤¹¡£ + + jcdctmgr.c ½çÊý¸þDCT¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È½èÍý + jddctmgr.c µÕÊý¸þDCT¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È½èÍý + + jccolor.c RGB->YCbCr ¿§¶õ´ÖÊÑ´¹ (ÈóSIMD) + jdcolor.c YCbCr->RGB ¿§¶õ´ÖÊÑ´¹ (ÈóSIMD) + jdmerge.c ¿§¶õ´ÖÊÑ´¹¡¿¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°Åý¹ç (ÈóSIMD) + jcsample.c ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° (ÈóSIMD) + jdsample.c ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° (ÈóSIMD) + jdsample.c ¤Ë¤Ï¡¢ÈóSIMDÈǤΠh1v2 ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°´Ø¿ô + (h1v2_upsample, h1v2_fancy_upsample) ¤âÄɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£ + + jdhuff.h ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥Ø¥Ã¥À) + jdhuff.c ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥·¡¼¥±¥ó¥·¥ã¥ë) + jdphuff.c ¥Ï¥Õ¥Þ¥óÉ乿¥Ç¥³¡¼¥É½èÍý (¥×¥í¥°¥ì¥Ã¥·¥Ö) + ¤³¤ì¤é¤Î£³¤Ä¤Î¥Õ¥¡¥¤¥ë¤ÎÊѹ¹ÅÀ¤Ï¡¢SIMD ²½¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ + ¥Ç¥³¡¼¥É½èÍý¤ÎÊýË¡¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£ + + jdcoefct.c DCT¥Ç¡¼¥¿¥Ö¥í¥Ã¥¯¤Î¥Þ¥Í¡¼¥¸¥á¥ó¥È + SIMD ²½¤È¤Ï´Ø·¸¤Ê¤¯¡¢°ìÉô¤Î¥³¡¼¥É¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£ + + jcomapi.c °µ½Ì/Ÿ³« ¶¦ÄÌ API ´Ø¿ôÄêµÁ + SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯´Ø¿ô¤Ê¤É¤¬Äɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£ + + jmemmgr.c JPEG library ÍÑ¥á¥â¥ê¥Þ¥Í¡¼¥¸¥ã (¥á¥¤¥ó) + SIMD ²½¤Ëȼ¤¤¡¢16¥Ð¥¤¥È¥¢¥É¥ì¥¹¶­³¦¤Ë¹ç¤Ã¤¿¥á¥â¥êÎΰè¤ò + ¾ï¤Ë16¥Ð¥¤¥Èñ°Ì¤Ç³ÎÊݤ¹¤ë¤è¤¦¤ËÊѹ¹¤·¤Æ¤¢¤ê¤Þ¤¹¡£ + + cjpeg.c JPEG °µ½ÌÍÑ ¥³¥Þ¥ó¥É¥é¥¤¥ó¡¦¥æ¡¼¥Æ¥£¥ê¥Æ¥£ + djpeg.c JPEG Ÿ³«ÍÑ ¥³¥Þ¥ó¥É¥é¥¤¥ó¡¦¥æ¡¼¥Æ¥£¥ê¥Æ¥£ + -v ¥ª¥×¥·¥ç¥ó¤Ç¤Î SIMD ´ØÏ¢¾ðÊó¤Îɽ¼¨¤ä¡¢-nosimd ¤Ê¤É¤Î + ¥ª¥×¥·¥ç¥ó¥¹¥¤¥Ã¥Á¤¬Äɲäµ¤ì¤Æ¤¤¤Þ¤¹¡£ + + rdbmp.c BMP ¥Õ¥¡¥¤¥ëÆÉ¤ß¹þ¤ß¥â¥¸¥å¡¼¥ë + wrbmp.c BMP ¥Õ¥¡¥¤¥ë½ñ¤­½Ð¤·¥â¥¸¥å¡¼¥ë + SIMD ²½¤È¤Ï´Ø·¸¤Ê¤¯¡¢°ìÉô¤Î¥³¡¼¥É¤ò¸úΨ²½¤µ¤»¤Æ¤¢¤ê¤Þ¤¹¡£ + + rdgif.c GIF ¥Õ¥¡¥¤¥ëÆÉ¤ß¹þ¤ß¥â¥¸¥å¡¼¥ë(version 6a) + wrgif.c GIF ¥Õ¥¡¥¤¥ë½ñ¤­½Ð¤·¥â¥¸¥å¡¼¥ë(version 6a) + Unisys ¤Î GIF (LZW) ÆÃµö¼º¸ú¤Ëȼ¤¤¡¢version 6a ¤Ç¥µ¥Ý¡¼¥È + ¤µ¤ì¤Æ¤¤¤¿ cjpeg/djpeg ¤Ç¤Î GIF ¤ÎÆþ½ÐÎϤòÉü³è¤µ¤»¤Þ¤·¤¿¡£ + ¤³¤Î GIF ¥â¥¸¥å¡¼¥ë¤Ï version 6a ¤Î¤â¤Î¤òήÍѤ·¤Æ¤¤¤Þ¤¹¡£ + version 6b ¤Î GIF ¥â¥¸¥å¡¼¥ë¤Ï unused/ ¤Ë¤¢¤ê¤Þ¤¹¡£ + + +¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/) + + altui/ ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤Ï¡¢¸µ¡¹ jpegaltui.v6b.tar.gz ¤È¤¤¤¦¥Õ¥¡¥¤¥ë̾¤Ç + Ê̤ËÇÛÉÛ¤µ¤ì¤Æ¤¤¤¿¤â¤Î¤Ç¤¹¡£¤³¤Î SIMD ÈǤǤϡ¢£±¥Õ¥¡¥¤¥ëÈÇ cjpeg/djpeg + ¤ÈƱÍͤΠSIMD Âбþ²½¤Ë´Ø¤¹¤ë½¤Àµ¤È¡¢Borland C++ / Microsoft VC++ ¤Ë + ¤ª¤¤¤Æ¥ï¡¼¥ë¥É¥«¡¼¥ÉŸ³«½èÍý¤òÍ­¸ú²½¤¹¤ë¤¿¤á¤Î¥³¡¼¥É¤ò½ñ¤­²Ã¤¨¤¿¤â¤Î¤Ç¤¹¡£ + + altui/cjpeg.c Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg + altui/djpeg.c Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠdjpeg + altui/README.alt jpegaltui.v6b.tar.gz ¤ËƱº­¤µ¤ì¤Æ¤¤¤¿ README + altui/usage.alt Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈÇ cjpeg/djpeg ¤Î¥Þ¥Ë¥å¥¢¥ë(º¹Ê¬) + + +¢£SIMD ÈǤǤϻȤï¤ì¤Ê¤¤¥Õ¥¡¥¤¥ë·² (unused/) + + unused/ ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤Ï¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG software ¤Ë¼ýÏ¿¤µ¤ì¤Æ + ¤¤¤¿¤¬¡¢¤³¤Î SIMD ÈǤǤϻȤï¤ì¤Ê¤¤/»È¤¨¤Ê¤¤¥Õ¥¡¥¤¥ë·²¤¬¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡£ + + unused/j?dct???.c ¥ª¥ê¥¸¥Ê¥ë¤Î£Ã¸À¸ìÈÇ DCT ´Ø¿ô + unused/jmem*.* ¥·¥¹¥Æ¥à°Í¸¥á¥â¥ê¥Þ¥Í¡¼¥¸¥ã(for MS-DOS/Macintosh) + unused/??gif.c version 6b ¤Î GIF ¥â¥¸¥å¡¼¥ë + unused/jconfig.* ¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î jconfig.* + unused/mak*.* ¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤Î Makefile.* + + + +[EOF] diff --git a/simd_install.ja.txt b/simd_install.ja.txt new file mode 100644 index 0000000..ef8f825 --- /dev/null +++ b/simd_install.ja.txt @@ -0,0 +1,436 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == INSTALL == +----------------------------------------------------------- + +¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï + + ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý¤ò²òÀâ + ¤·¤Þ¤¹¡£¾¡¼ê¤Ê¤¬¤é¡¢¤³¤³¤Ç¤Ï¥ª¥ê¥¸¥Ê¥ëÈǤΠlibjpeg ¥é¥¤¥Ö¥é¥ê¤Î°·¤¤Êý + (¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý¡¿¥×¥í¥°¥é¥à¤ÎÃæ¤Ç¤Î»È¤¤Êý)¤ò¤¢¤ëÄøÅÙ¿´ÆÀ¤Æ¤¤¤ë¤È¤¤¤¦ + ¿Í¤òÂоݤˤµ¤»¤Æ¤¤¤¿¤À¤­¤Þ¤¹¡£¥ª¥ê¥¸¥Ê¥ëÈǤλÈÍÑË¡¤Ë¤Ä¤¤¤Æ¤Ï¡¢ + install.doc (±Ñʸ) ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ + + ¢£¥¢¥»¥ó¥Ö¥é NASM ¤ÎÆþ¼ê¡¿¥¤¥ó¥¹¥È¡¼¥ë + ¢£¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý + ¡ü Microsoft Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç + ¡ü jconfig.h ¤È Makefile ¤òÁªÂò¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë + ¡ü UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦ + ¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/) + ¢£¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹¤Ë¤Ï + ¢£ÆÃÄê¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï + + +¢£¥¢¥»¥ó¥Ö¥é NASM ¤ÎÆþ¼ê¡¿¥¤¥ó¥¹¥È¡¼¥ë + + ¤³¤Î x86 SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î SIMD ³ÈÄ¥Éôʬ¤Ï¡¢¤½¤Î¤Û¤È¤ó¤É¤¬ + x86 ¤Î¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Ç½ñ¤«¤ì¤Æ¤¤¤Þ¤¹¡£¤³¤Î¥¢¥»¥ó¥Ö¥ê¸À¸ì¥½¡¼¥¹¥³¡¼¥É¤ò + ¥¢¥»¥ó¥Ö¥ë¤¹¤ë¤Ë¤Ï¡¢NASM (Netwide Assembler) ¤È¤¤¤¦¥¢¥»¥ó¥Ö¥é¤¬É¬ÍפǤ¹¡£ + Microsoft ¤Î MASM ¤ä¤½¤Î¸ß´¹¥¢¥»¥ó¥Ö¥é¤Ç¤Ï°·¤¨¤Þ¤»¤ó¤Î¤ÇÃí°Õ¤·¤Æ¤¯¤À¤µ¤¤¡£ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NASM (Netwide Assembler) ¤Ï¡¢¸ø¼°¥µ¥¤¥È http://nasm.sourceforge.net/ or + http://sourceforge.net/project/showfiles.php?group_id=6208 ¤«¤é¥À¥¦¥ó + ¥í¡¼¥É¤Ç¤­¤Þ¤¹¡£Ver.0.98.25 °Ê¹ß¤ÎÈæ³ÓŪ¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤Î¤â¤Î¤¬É¬ÍפǤ¹¡£ + ¸½»þÅÀ(2006/02)¤Ç¤ÎºÇ¿·ÈÇ¤Ï Ver.0.98.39 ¤Ç¤¹¡£ + + ¤Ê¤ª¡¢x86 ÈǤΠDarwin ¤ä Mac OS X ¤Ê¤É¤Ç»ÈÍѤ¹¤ë¾ì¹ç¤Ï¡¢¸½»þÅÀ¤Ç¤Ï¤Þ¤À + Àµ¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ¤¤¤Ê¤¤ Ver.0.98.40 °Ê¹ß¤Î¥Ð¡¼¥¸¥ç¥ó¤Î¤â¤Î¤¬É¬ÍפǤ¹¡£ + ¸½»þÅÀ¤Ç¤Ï¡¢Ver.0.98.40 ¤Ï¾åµ­¤Î¸ø¼°¥µ¥¤¥È¤Î CVS ¥ê¥Ý¥¸¥È¥ê¤«¤é¥½¡¼¥¹ + ¥³¡¼¥É¤ò¥À¥¦¥ó¥í¡¼¥É¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¡¿¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£ + + Windows ·Ï¤Î¾ì¹ç¤Ï¡¢nasm-0.XX.YY-win32.zip (XX.YY ¤Ë¤Ï¥Ð¡¼¥¸¥ç¥óÈֹ椬Æþ¤ë) + ¤È¤¤¤¦Ì¾Á°¤Î¥Õ¥¡¥¤¥ë¤ò¥À¥¦¥ó¥í¡¼¥É¤·¤Æ¡¢¤½¤ì¤Ë´Þ¤Þ¤ì¤ë nasmw.exe ¤ò + £Ã¥³¥ó¥Ñ¥¤¥é¤Î¼Â¹Ô¥Õ¥¡¥¤¥ë·²¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ë¾ì½ê¤Ë¥³¥Ô¡¼¤·¤Þ¤¹¡£ + + ³Æ¼ï PC-UNIX ¤Î¾ì¹ç¤Ï¡¢OS ¤ÎÇÛÉÛ¸µ¤Ë¤Æ°Ü¿¢ºÑ¤ß¥Ñ¥Ã¥±¡¼¥¸¤¬Ä󶡤µ¤ì¤Æ¤¤¤ë + ¾ì¹ç¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¤Þ¤ººÇ½é¤Ë¤½¤Á¤é¤ò³Îǧ¤·¤Æ¤ß¤Æ¤¯¤À¤µ¤¤¡£¤½¤ì¤¬¤Ê¤¤ + ¾ì¹ç¤Ï¡¢¾åµ­¸ø¼°¥µ¥¤¥È¤«¤é¥½¡¼¥¹¥³¡¼¥É(nasm-0.XX.YY.tar.gz)¤ò¥À¥¦¥ó¥í¡¼¥É + ¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¡¿¥¤¥ó¥¹¥È¡¼¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£linux ¤Î¾ì¹ç¤Ç rpm ¥Ñ¥Ã¥±¡¼¥¸¤ò + °·¤¨¤ë¥·¥¹¥Æ¥à¤Î¾ì¹ç¤Ï¡¢¾åµ­¸ø¼°¥µ¥¤¥È¤Ë¤Æ rpm ¥Ð¥¤¥Ê¥ê¥Ñ¥Ã¥±¡¼¥¸¤âÆþ¼ê + ¤Ç¤­¤Þ¤¹¡£ + + Ãí°ÕÅÀ¤È¤·¤Æ¡¢YASM (http://www.tortall.net/projects/yasm/) ¤Ï»È¤ï¤Ê¤¤¤Ç + ¤¯¤À¤µ¤¤¡£YASM ¤Ï NASM ¸ß´¹¤òëð¤Ã¤Æ¤¤¤Þ¤¹¤¬¡¢¸½ºß¤Î¥Ð¡¼¥¸¥ç¥ó(0.4.0)¤Ç¤Ï + ¤Þ¤À¸ß´¹ÅÙ¤¬Ä㤤¾å¤Ë¥Ð¥°¤¬¤¢¤ë(¥¢¥É¥ì¥¹·×»»¤¬¤Þ¤Ã¤¿¤¯¥Ç¥¿¥é¥á¤Ê¥³¡¼¥É¤ò + À¸À®¤·¤Æ¤¤¤ë)¤¿¤á¡¢ÅöÊý¤Î¥Æ¥¹¥È¤Ç¤Ï YASM ¤Ç¥¢¥»¥ó¥Ö¥ë¤·¤¿¥³¡¼¥É¤Ï¤Þ¤Ã¤¿¤¯ + ư¤­¤Þ¤»¤ó¤Ç¤·¤¿¡£¡ÊÃí¡§¤³¤Î x86 SIMD extension for IJG JPEG library + ¤Ç¤Ï¡¢¤¿¤È¤¨ YASM ¤òÍѤ¤¤Æ¤â AMD64 ¤Î 64bit Âбþ¤Ë¤Ï¤Ê¤ê¤Þ¤»¤ó¡£¡Ë + + +¢£¥³¥ó¥Ñ¥¤¥ë¤Î»ÅÊý + + ¤Û¤È¤ó¤É¤ÎÉôʬ¤Ç¥ª¥ê¥¸¥Ê¥ëÈǤÈÊѤï¤ê¤¢¤ê¤Þ¤»¤ó¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤Ç¤Ï¡¢ + ¤³¤Î SIMD ³ÈÄ¥ÈÇ¤ËÆÃÍ­¤ÎÃí°ÕÅÀ¤òÃæ¿´¤Ë½Ò¤Ù¤Þ¤¹¡£ + + + ¡ü Microsoft Visual C++ 6.0 °Ê¹ß¤ÎÅý¹ç³«È¯´Ä¶­(DevStudio)¤Î¾ì¹ç + + ¤³¤ÎÇÛÉÛ¥»¥Ã¥È¤Ë¤Ï¡¢Microsoft Visual C++ 6.0 ÍÑ¤Î¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë + ¤¬ÉÕ°¤·¤Æ¤¤¤Þ¤¹¡£V6.0 °Ê¹ß¤Î VC++ ¤Î¾ì¹ç¤Ï¡¢VC++ 6.0 ¤Î¥Õ¥¡¥¤¥ë¤ò + ÊÑ´¹(¥¤¥ó¥Ý¡¼¥È)¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£Microsoft Visual C++ 2005 Express + Edition ¤Ë¤Æ¡¢¥¤¥ó¥Ý¡¼¥È¡¿¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£ + + ÊýË¡¤Ï¡¢¤Þ¤º vc6proj ¥Õ¥©¥ë¥À¤ÎÃæ¤Ë¤¢¤ë¥Õ¥¡¥¤¥ë¤ò¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë·² + (*.c) ¤Î¤¢¤ë¥Õ¥©¥ë¥À¤Ë¤¹¤Ù¤Æ°Üư¤·¤Þ¤¹¡£¤½¤·¤Æ libjpeg.dsw ¤ò³«¤¤¤Æ + (¤â¤·¤¯¤Ï¥¤¥ó¥Ý¡¼¥È¤·¤Æ)¡¢¥á¥Ë¥å¡¼¤Î ¥Ó¥ë¥É¢ª¥Ð¥Ã¥Á¥Ó¥ë¥É ¤Ç¤¹¤Ù¤Æ¤Ë + ¥Á¥§¥Ã¥¯¤òÆþ¤ì¤Æ¥Ó¥ë¥É¤¹¤ì¤Ð£Ï£Ë¤Ç¤¹¡£ + + ¥ï¡¼¥¯¥¹¥Ú¡¼¥¹ libjpeg.dsw ¤Ë¤Ï¡¢°Ê²¼¤Î¥×¥í¥¸¥§¥¯¥È¤¬¼ý¤á¤é¤ì¤Æ¤¤¤Þ¤¹¡£ + + ¢¡ makecfg.dsp + + ¤³¤Î¥×¥í¥¸¥§¥¯¥È¤Ï¡¢makecfg.c ¤ò¥³¥ó¥Ñ¥¤¥ë¡¿¥ê¥ó¥¯¤·¤Æ¡¢¥«¥¹¥¿¥à + ¥Ó¥ë¥É¥¹¥Æ¥Ã¥×¤Ç¤½¤ì(makecfg.exe)¤ò¼Â¹Ô¤·¡¢libjpeg.dsp ¤Î¥Ó¥ë¥É¤Ë + ɬÍפÊÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òºî¤ëƯ¤­¤ò¤·¤Æ¤¤¤Þ¤¹¡£ + + ¤³¤ì¤Ï¡¢libjpeg.dsp ¤¬¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ëÁ°¤Ëɬ¤º¥³¥ó¥Ñ¥¤¥ë(¼Â¹Ô) + ¤µ¤ì¤Ê¤±¤ì¤Ð¤Ê¤ê¤Þ¤»¤ó¡£ + + ¢¡ libjpeg.dsp + + JPEG ¥é¥¤¥Ö¥é¥ê libjpeg.lib ¤ò¥Ó¥ë¥É¤·¤Þ¤¹¡£makecfg.dsp ¤¬½ÐÎϤ¹¤ë + ÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤¬É¬ÍפǤ¹¡£ + + ¢¡ cjpeg.dsp, djpeg.dsp, jpegtran.dsp, rdjpgcom.dsp, wrjpgcom.dsp + + IJG JPEG library ¤ËÉÕ°¤·¤Æ¤¤¤ë¥µ¥ó¥×¥ë¡¦¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤Ç¤¹¡£ + cjpeg ¤Ï JPEG °µ½Ì¤ò¡¢djpeg ¤Ï JPEG Ÿ³«¤ò¡¢jpegtran ¤Ï JPEG + ¥Õ¥¡¥¤¥ë¤ÎÊÑ´¹¤ò¤·¤Þ¤¹¡£rdjpgcom ¤È wrjpgcom ¤Ï¡¢JPEG¥Õ¥¡¥¤¥ë + Ãæ¤Î¥Æ¥­¥¹¥È¥³¥á¥ó¥Èʸ¤òÁàºî¤·¤Þ¤¹¡£ + + ¢¡ apptest.dsp + + ¤Ç¤­¤¢¤¬¤Ã¤¿ cjpeg, djpeg, jpegtran ¤ËÂФ·¤Æ´Êñ¤Êưºî¥Æ¥¹¥È¤ò + ¤·¤Þ¤¹(make test ¤ÈƱÅù)¡£¤Þ¤º¡¢¤³¤ì¤é¤Î¥½¥Õ¥È¤Ç¼ÂºÝ¤Ë²èÁü¥Õ¥¡¥¤¥ë + ¤òÊÑ´¹¤µ¤»¡¢¤½¤Î½ÐÎϤò fc.exe ¤ÇÈæ³Ó¤·¤Þ¤¹¡£"FC: Áê°ãÅÀ¤Ï¸¡½Ð + ¤µ¤ì¤Þ¤»¤ó¤Ç¤·¤¿" ¤È¤¤¤¦¥á¥Ã¥»¡¼¥¸¤¬£¶²óɽ¼¨¤µ¤ì¤ì¤Ð¡¢¥×¥í¥°¥é¥à¤Ï + Àµ¤·¤¯Æ°¤¤¤Æ¤¤¤Þ¤¹¡£ + + JPEG ¥é¥¤¥Ö¥é¥ê libjpeg.lib ¤ò¾¤Î¥½¥Õ¥È¤Ë¥ê¥ó¥¯¤¹¤ë¤¿¤á¤Ë libjpeg.dsp + ¤ò¾¤Î¥ï¡¼¥¯¥¹¥Ú¡¼¥¹¡¿¥½¥ê¥å¡¼¥·¥ç¥ó¤Ë´Þ¤á¤ë¾ì¹ç¤Ï¡¢makecfg.dsp ¤â + ɬ¤ºÆ±¤¸¥ï¡¼¥¯¥¹¥Ú¡¼¥¹¡¿¥½¥ê¥å¡¼¥·¥ç¥ó¤Ë´Þ¤á¡¢¾ï¤Ë makecfg.dsp ¤¬ + libjpeg.dsp ¤è¤ê¤âÀè¤Ë¥Ó¥ë¥É¤µ¤ì¤ë¤è¤¦¤Ë¡¢°Í¸´Ø·¸¤òÀßÄꤷ¤Æ¤¯¤À¤µ¤¤¡£ + ¤³¤ì¤Ï¡¢Àè¤Ë½Ò¤Ù¤¿¤è¤¦¤Ë¡¢libjpeg.dsp ¤Ï makecfg.dsp ¤¬½ÐÎϤ¹¤ëÀßÄê + ¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤òɬÍפȤ·¤Æ¤¤¤ë¤¿¤á¤Ç¤¹¡£ + + ÀßÄêË¡¤Ï¡¢(VC++ 6.0¤Î¾ì¹ç) ¥á¥Ë¥å¡¼¤Î ¥×¥í¥¸¥§¥¯¥È ¢ª °Í¸´Ø·¸ ¤Ç + libjpeg ¤òÁªÂò¤·¡¢²¼¤Î°ìÍ÷¤ÎÃæ¤Î makecfg ¤Ë¥Á¥§¥Ã¥¯¤òÆþ¤ì¤Þ¤¹¡£ + + + ¡ü jconfig.h ¤È Makefile ¤òÁªÂò¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë + + Windows ·Ï¤Î¥³¥ó¥Ñ¥¤¥é¤Î¾ì¹ç¤Ç¡¢¥³¥Þ¥ó¥É¥é¥¤¥ó¤«¤é¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¾ì¹ç + ¤Ï¡¢¤³¤ÎÊýË¡¤ò¤È¤Ã¤Æ¤¯¤À¤µ¤¤¡£ + + ÊýË¡¤Ï¡¢ÉÕ°¤Î jconfig.* ¤È Makefile.* ¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤ò¤½¤ì¤¾¤ì + °ì¤Ä¤º¤ÄÁª¤Ó¡¢¤½¤ì¤¾¤ì jconfig.h ¤È Makefile ¤Ë̾Á°¤òÊѤ¨¤Þ¤¹¡£ + ¤½¤·¤Æ¡¢¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Î¤¢¤ë¥Ç¥£¥ì¥¯¥È¥ê(¥Õ¥©¥ë¥À)¤ò¥«¥ì¥ó¥È¥Ç¥£¥ì + ¥¯¥È¥ê¤Ë¤·¤Æ¡¢¥³¥Þ¥ó¥É¥é¥¤¥ó¤Ç make (VC++ ¤Î¾ì¹ç¤Ï nmake)¤È¥¿¥¤¥× + ¤¹¤ì¤Ð£Ï£Ë¤Ç¤¹¡£¸å¤Ï¼«Æ°Åª¤Ë¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤È¡¢¥µ¥ó¥×¥ë¥¢¥×¥ê + ¥±¡¼¥·¥ç¥ó(cjpeg ¤ä djpeg ¤Ê¤É)¤¬¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤Þ¤¹¡£ + + ÉÕ°¤·¤Æ¤¤¤ë jconfig.* ¤È Makefile.* ¤Ï¡¢°Ê²¼¤Î½èÍý·Ï¤ËÂбþ¤·¤Æ¤¤¤Þ¤¹¡£ + ¤Ê¤ª¡¢¥ª¥ê¥¸¥Ê¥ëÈǤËÉÕ°¤·¤Æ¤¤¤ë jconfig.* ¤È Makefile.* ¤Ï¡¢¤³¤Î + SIMD ÂбþÈǤǤϻÈÍѤǤ­¤Þ¤»¤ó(¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Î¹½À®¤¬ÊѤï¤Ã¤Æ¤¤¤ë¤¿¤á)¡£ + + ¡¦ jconfig.dj & makefile.dj -- DJGPP v2.0 or later + ¡¦ jconfig.bc5 & makefile.bc5 -- Borland C++ Compiler 5.5 (win32) + ¡¦ jconfig.mgw & makefile.mgw, makefile.mgwdll -- MinGW + ¡¦ jconfig.vc & makefile.vc , makefile.vcdll -- VC++ 6.0 or later + ¡¦ jconfig.linux & makefile.linux -- linux + + ¤³¤ì¤é°Ê³°¤Î¥³¥ó¥Ñ¥¤¥é¤ËÂбþ¤µ¤»¤ë¾ì¹ç¤Ï¡¢install.doc ¤Ë½ñ¤¤¤Æ¤¢¤ë + ¤è¤¦¤Ë¡¢¤Þ¤º ckconfig.c ¤ò¥³¥ó¥Ñ¥¤¥ë¡¿¼Â¹Ô¤·¤Æ jconfig.h ¤òÀ¸À®¤·¡¢ + makefile.unix ¤â¤·¤¯¤Ï makefile.ansi ¤ò¼êºî¶È¤ÇÊÔ½¸¤·¤Æ Makefile ¤ò + ºîÀ®¤·¤Æ¤¯¤À¤µ¤¤¡£ + + °Ê²¼¡¢³Æ¥³¥ó¥Ñ¥¤¥é¤Ë¸ÇÍ­¤ÎÃí°ÕÅÀ¤ò½Ò¤Ù¤Þ¤¹¡£ + + ¢¡ jconfig.dj & makefile.dj -- DJGPP v2.0 or later. + + ²Äǽ¤Ê¸Â¤ê¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£DJGPP 2.03 + gcc 3.4.4 + + binutils 2.16.1 ¤È¤¤¤¦ÁȤ߹ç¤ï¤»¤Çưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£ + + makefile.dj ¤Ë¤Ï¡¢gcc 3.4.x ¸þ¤±¤Î¥³¥ó¥Ñ¥¤¥ë¥ª¥×¥·¥ç¥ó¤¬½ñ¤«¤ì¤Æ + ¤¤¤Þ¤¹¤Î¤Ç¡¢gcc 3.4.x °Ê³°¤Ç¤¦¤Þ¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢Å¬Åö¤Ë + ½ñ¤­Ä¾¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£ + + ¢¡ jconfig.bc5 & makefile.bc5 -- Borland C++ Compiler 5.5 (win32) + + ²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ï¤Ç¤­¤ë¤Ï¤º¤Ç¤¹¤¬¡¢¤³¤Î BCC 5.5 ¤Î¾ì¹ç¤Ï¡¢ + ¤Û¤È¤ó¤É¤Î¥±¡¼¥¹¤Ç SSE/SSE2 ¤¬Æ°ºî¤·¤Þ¤»¤ó¡£Íýͳ¤Ï¡¢BCC 5.5 ¤ËÉÕ°¤Î + ¥ê¥ó¥«(ilink32.exe)¤¬¸Å¤¯¡¢SSEÄê¿ô¤ò16¥Ð¥¤¥È¶­³¦¥¢¥É¥ì¥¹¤ËÇÛÃÖ¤¹¤ë + ¤³¤È¤¬¤Ç¤­¤Ê¤¤¤¿¤á¤Ç¤¹¡£ + + ¤½¤Î¤¿¤á¡¢BCC 5.5 ¤ò»È¤¦¾ì¹ç¤Ï SSE/SSE2 ¤Î¥µ¥Ý¡¼¥È¤òºï½ü(¸å½Ò)¤·¤Æ + ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤³¤È¤ò¤ª´«¤á¤¤¤¿¤·¤Þ¤¹¡£ + + ¢¡ jconfig.mgw & makefile.mgw, makefile.mgwdll -- MinGW + + ²Äǽ¤Ê¸Â¤ê¿·¤·¤¤¥Ð¡¼¥¸¥ç¥ó¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£gcc 3.4.4 + binutils + 2.16.91 ¤È¤¤¤¦ÁȤ߹ç¤ï¤»¤Çưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£ + + ¤Þ¤¿¡¢GNU make ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ë¤³¤È¤âɬÍפǤ¹¡£MinGW ¤Î¾ì¹ç¡¢ + (make.exe ¤Ç¤Ï¤Ê¤¯) mingw32-make.exe ¤È¤¤¤¦Ì¾Á°¤Ç¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì + ¤Þ¤¹¤Î¤Ç¡¢make ¤Ç¤Ï¤Ê¤¯ mingw32-make ¤È¥¿¥¤¥×¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£ + + ¤³¤ì¤é¤Î makefile ¤Ë¤Ï¡¢gcc 3.4.x ¸þ¤±¤Î¥³¥ó¥Ñ¥¤¥ë¥ª¥×¥·¥ç¥ó¤¬½ñ¤« + ¤ì¤Æ¤¤¤Þ¤¹¤Î¤Ç¡¢gcc 3.4.x °Ê³°¤Ç¤¦¤Þ¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢ + ŬÅö¤Ë½ñ¤­Ä¾¤·¤Æ»È¤Ã¤Æ¤¯¤À¤µ¤¤¡£ + + makefile.mgw ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤òÀÅۥ饤¥Ö¥é¥ê(libjpeg.a)¤Ë + ¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î libjpeg.a ¤ò¥ê¥ó¥¯¤·¤¿ cjpeg, djpeg, + jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£makefile.mgwdll ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤ò + DLL(jpeg62.dll)¤Ë¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î jpeg62.dll ¤ò¥ê¥ó¥¯¤·¤¿ + cjpeg, djpeg, jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£ + + ¢¡ jconfig.vc & makefile.vc , makefile.vcdll -- VC++ 6.0 or later + + VC++ 6.0 °Ê¹ß¤Ê¤é²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤Ï¤º¤Ç¤¹¡£¥Õ¥ê¡¼¤Ç + ¸ø³«¤µ¤ì¤Æ¤¤¤ë Microsoft Visual C++ Toolkit 2003 ¤Ç¤â¡¢nmake.exe + ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ì¤Ð¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Þ¤¹¡£ + + makefile.vc ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤òÀÅۥ饤¥Ö¥é¥ê(libjpeg.lib)¤Ë + ¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î libjpeg.lib ¤ò¥ê¥ó¥¯¤·¤¿ cjpeg, djpeg, + jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£makefile.vcdll ¤Ï¡¢libjpeg ¥é¥¤¥Ö¥é¥ê¤ò + DLL(jpeg62.dll)¤Ë¥³¥ó¥Ñ¥¤¥ë¤·¤¿¾å¤Ç¡¢¤³¤Î jpeg62.dll ¤ò¥ê¥ó¥¯¤·¤¿ + cjpeg, djpeg, jpegtran ¤òºîÀ®¤·¤Þ¤¹¡£ + + ¢¡ jconfig.linux & makefile.linux -- linux + + ³Æ¼ï¤Î linux ¤ËÂбþ¤·¤Þ¤¹¤¬¡¢linux ¤Î¾ì¹ç¤Ï configure ¥¹¥¯¥ê¥×¥È¤ò + »È¤¦¤³¤È¤ò¶¯¤¯¤ª´«¤á¤¤¤¿¤·¤Þ¤¹¡£ + + + ¡ü UNIX ´Ä¶­¤Ç configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦ + + ³Æ¼ï¤Î PC-UNIX ¤Î¾ì¹ç¤Ê¤É¡¢¥·¥§¥ë¡¦¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ë¤³¤È¤Î¤Ç¤­¤ë + ´Ä¶­¤Î¾ì¹ç¤Ï¡¢configure ¥¹¥¯¥ê¥×¥È¤ò»È¤¦¤È´Êñ¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Þ¤¹¡£ + + $ ./configure --enable-shared --enable-static + $ make + $ make test (¢«Æ°ºî¥Æ¥¹¥È¡¨É¬Íפ˱þ¤¸¤Æ) + # make install + + ¤³¤ÎÊýË¡¤Ç¤Ï¡¢°Ê²¼¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ç¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë¤³¤È¤ò³Îǧ¤·¤Æ + ¤¤¤Þ¤¹¡£¤³¤ì°Ê³°¤Î UNIX ·Ï OS ¤Ç¤â¥³¥ó¥Ñ¥¤¥ë¤Ï²Äǽ¤À¤È»×¤ï¤ì¤Þ¤¹¤¬¡¢ + ¼ã´³¤Î¼êľ¤·¤¬É¬Íפˤʤë¾ì¹ç¤â¤¢¤ë¤«¤È»×¤¤¤Þ¤¹¡£ + + ¡¦Vine Linux 2.6r4 (gcc 2.95.3) ¤ª¤è¤Ó Vine Linux 3.2 (gcc 3.4.4) + ¡¦Fedora core 3 (gcc 3.4.2) ¤ª¤è¤Ó Fedora core 4 (gcc 4.0.0) + ¡¦FreeBSD 5.4 (gcc 3.4.2) ¤ª¤è¤Ó FreeBSD 6.0 (gcc 3.4.4) + ¡¦NetBSD 2.0 (gcc 3.3.3) ¤ª¤è¤Ó NetBSD 3.0 (gcc 3.3.3) + ¡¦Solaris 10 1/06 for x64/x86 (i386-pc-solaris2.10) + ¡¦Darwin 8.0.1 for x86 (i386-apple-darwin8.0.1; gcc 3.3) + ¡¦MinGW & MSYS (gcc 3.4.4) + ¡¦cygwin (gcc 3.4.4) + + Ʊº­¤Î configure ¥¹¥¯¥ê¥×¥È¤Ï GNU autoconf ¤ÇÀ¸À®¤µ¤ì¤¿¤â¤Î¤Ç¤¹¡£ + ¤³¤Î configure ¥¹¥¯¥ê¥×¥È¤Ï¡¢°ìÈÌŪ¤Ê configure ¥¹¥¯¥ê¥×¥È¤¬Ç§¼±¤¹¤ë + ¥¹¥¤¥Ã¥Á¥ª¥×¥·¥ç¥ó¤Î¾¤Ë¡¢°Ê²¼¤Î¥¹¥¤¥Ã¥Á¤òǧ¼±¤·¤Þ¤¹¡£ + + ¡û --enable-shared / --enable-static + + --enable-shared ¤ò»ØÄꤹ¤ë¤È¡¢GNU libtool ¤ò»È¤Ã¤Æ¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò + ºîÀ®¤·¤Þ¤¹¡£--enable-static ¤ò»ØÄꤹ¤ë¤È¡¢Æ±¤¸¤¯ GNU libtool ¤ò + »È¤Ã¤ÆÀÅۥ饤¥Ö¥é¥ê¤òºîÀ®¤·¤Þ¤¹¡£Î¾Êý¤ò»ØÄꤹ¤ë¤È¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê + ¤ÈÀÅۥ饤¥Ö¥é¥ê¤ÎξÊý¤òºîÀ®¤·¤Þ¤¹¡£µÕ¤Ë¤³¤ÎξÊý¤È¤â»ØÄꤷ¤Ê¤¤¤È¡¢ + GNU libtool ¤ò»È¤ï¤º¤ËÀÅۥ饤¥Ö¥é¥ê¤Î¤ß¤òºîÀ®¤·¤Þ¤¹¡£ + + ¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤¹¤ë¾ì¹ç¤ÏξÊý¤ò»ØÄꤹ¤ë + ɬÍפ¬¤¢¤ë¤Ç¤·¤ç¤¦¡£ + + ¡û --disable-mmx / --disable-3dnow / --disable-sse / --disable-sse2 + + ÆÃÄê¤Î SIMD Ì¿Î᥻¥Ã¥È¤Î¥µ¥Ý¡¼¥È(¥³¡¼¥É)¤òºï½ü¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Þ¤¹¡£ + + ¡û --enable-uchar-boolean + + ¥½¡¼¥¹¥³¡¼¥ÉÃæ¤Î bool ·¿¤ÎÄêµÁ¤ò int ·¿¤Ç¤Ï¤Ê¤¯ unsigned char ·¿¤Ë + Êѹ¹¤·¤Þ¤¹¡£¤³¤Î¥ª¥×¥·¥ç¥ó¤ÏÄ̾ï¤Ï(ÆÃ¤ËɬÍפǤʤ¤¸Â¤ê)»ÈÍѤ·¤Ê¤¤¤Ç + ¤¯¤À¤µ¤¤¡£¤³¤ì¤Ï¡¢MinGW ¤Ê¤É¤Î Windows ¾å¤Î UNIX ´Ä¶­¤Ë¤Æ¡¢bool ·¿ + ¤ÎÄêµÁ¤ò Windows ¤Î½¬´·¤Ë¹ç¤ï¤»¤ë¤¿¤á¤ËÍѰդµ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¤¹¡£ + + °Ê²¼¡¢³Æ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¸ÇÍ­¤ÎÃí°ÕÅÀ¤ò½Ò¤Ù¤Þ¤¹¡£ + + ¢¡ ³Æ¼ï linux ¥Ç¥£¥¹¥È¥ê¥Ó¥å¡¼¥·¥ç¥ó + + ¤è¤Û¤É¸Å¤¤¤â¤Î¤Ç¤Ê¤¤¸Â¤ê¡¢¾åµ­¤Î¼ê½ç¤Ç²¿¤ÎÌäÂê¤â¤Ê¤¯¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë + ¤Ï¤º¤Ç¤¹¡£ + + ¾åµ­¤Î¼ê½ç¤Ç¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤·¤¿¾ì¹ç¡¢¤½¤Î¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï + 62.1.0 (¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.1.0) ¤È¤Ê¤ê¤Þ¤¹¡£¤³¤ì¤ÏÁ°½Ò¤·¤¿ + ¤È¤ª¤ê¡¢¥ª¥ê¥¸¥Ê¥ëÈÇ(¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.0.0)¤È¥Ð¥¤¥Ê¥ê¥ì¥Ù¥ë + ¤Ç¤Î¾å°Ì¸ß´¹À­¤¬¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤ½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È + ¤¬¤Ç¤­¤Þ¤¹¡£ + + rpm ¤ò»È¤Ã¤¿¥Ñ¥Ã¥±¡¼¥¸´ÉÍý¤òºÎÍѤ·¤Æ¤¤¤ë¥Ç¥£¥¹¥È¥ê¥Ó¥å¡¼¥·¥ç¥ó¤Ç¤Ï¡¢ + Ʊº­¤Î spec ¥Õ¥¡¥¤¥ë (libjpeg.spec) ¤â¤´ÍøÍѤ¤¤¿¤À¤±¤Þ¤¹¡£¤³¤ì¤Ï¡¢ + Vine Linux 3.2 ¤ª¤è¤Ó Fedora core 4 ¤Ç¤Îưºî¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£ + + ¢¡ xBSD ¥Õ¥¡¥ß¥ê (FreeBSD/NetBSD/OpenBSD) + + ºÇ¶á¤Î¥Ð¡¼¥¸¥ç¥ó¤Î FreeBSD ¤È NetBSD ¤Ë´Ø¤·¤Æ¤Ï¡¢¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤ë + ¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£OpenBSD ¤Ë´Ø¤·¤Æ¤âÌäÂê¤Ï¤Ê¤¤¤È»×¤¤¤Þ¤¹¡£ + ¤¿¤À¡¢¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥©¡¼¥Þ¥Ã¥È¤Ë a.out ¤ò»È¤Ã¤¿¸Å¤¤¤â¤Î¤Ë´Ø¤·¤Æ¤Ï¡¢ + ¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¤Î¼ïÎà¤â°ì±þ configure ¥¹¥¯¥ê¥×¥È¤Ë¤Æ¸¡½Ð¤Ç¤­¤ë + ¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¤¬¡¢Æ°ºî¤Ï̤³Îǧ¤Ç¤¹¡£ + + FreeBSD ¤Î¾ì¹ç¡¢¾åµ­¤Î¼ê½ç¤Ç¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºîÀ®¤·¤¿¾ì¹ç¡¢¤½¤Î + ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï 9 (¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.9) ¤È¤Ê¤ê¤Þ¤¹¡£¤³¤ì¤Ï¡¢ + ports collection ¤ÎÃæ¤Ë¤¢¤ë¸ø¼°ÈǤΥС¼¥¸¥ç¥óÈÖ¹æ¤Ë½à¤¸¤¿¤â¤Î¤Ç¡¢ + (¥Ð¥¤¥Ê¥ê¾å°Ì¸ß´¹¤Ê¤Î¤Ç)¸ø¼°ÈǤȤ½¤Î¤Þ¤ÞÃÖ¤­´¹¤¨¤ë¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£ + NetBSD/OpenBSD ¤Î¾ì¹ç¤Î¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤Ï 62.1.0 (¥Õ¥¡¥¤¥ë̾¡§ + libjpeg.so.62.1.0) ¤Ë¤Ê¤ê¤Þ¤¹(¥Ð¥¤¥Ê¥ê¾å°Ì¸ß´¹)¡£ + + ¢¡ Solaris 10 + + ºî¼Ô¤Î¥Æ¥¹¥È¤Ç¤Ï¡¢Àµ¾ï¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Æ¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤¬¥Ð¥¤¥Ê¥ê + ¸ß´¹¤Ë¤Ê¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹(¥Õ¥¡¥¤¥ë̾¡§libjpeg.so.62.1.0)¡£ + ¤Ç¤¹¤¬¡¢¤³¤Î SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Ï AMD64 ¤Ë¤Ï¸½»þÅÀ¤Ç¤ÏÂбþ + ¤·¤Æ¤¤¤Ê¤¤¤¿¤á¡¢32bitÈǤΥ饤¥Ö¥é¥ê¤·¤«ºî¤ì¤Þ¤»¤ó¡£ + + ¢¡ Darwin for x86 + + ºî¼Ô¤Î¥Æ¥¹¥È¤Ç¤Ï¡¢Àµ¾ï¤Ë¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Æ¡¢¶¦Í­¥é¥¤¥Ö¥é¥ê¤¬¥Ð¥¤¥Ê¥ê + ¸ß´¹¤Ë¤Ê¤ë¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹(¥Õ¥¡¥¤¥ë̾¡§libjpeg.62.1.0.dylib)¡£ + x86 ÈÇ Mac OS X ¤Ç¤â¡¢¥³¥ó¥Ñ¥¤¥ë¤µ¤¨¤Ç¤­¤ì¤Ðưºî¤¹¤ë¤â¤Î¤È»×¤ï¤ì¤Þ¤¹¡£ + + ¸½»þÅÀ¤Ç¤Ï¡¢¥¢¥»¥ó¥Ö¥é nasm ¤Î Darwin ¤Ø¤Î¥µ¥Ý¡¼¥È¤¬½½Ê¬¤Ç¤Ê¤¤¤¿¤á¡¢ + ¤ä¤ä¥È¥ê¥Ã¥­¡¼¤Ê¥³¡¼¥É¤Ç Darwin / Mac OS X ¤ËÂбþ¤µ¤»¤Æ¤¤¤Þ¤¹¤¬¡¢ + ưºî¤Ë¤Ï¤Þ¤Ã¤¿¤¯ÌäÂê¤Ê¤¤¤Ï¤º¤Ç¤¹¡£ + + ¢¡ MinGW & MSYS (gcc 3.4.4) + + MinGW ¤Î¾ì¹ç¤Ï¡¢ÉÕ°¤Î makefile.mgw / makefile.mgwdll ¤ò»È¤¦¤³¤È¤ò + ¿ä¾©¤·¤Þ¤¹¤¬¡¢MSYS ¤¬¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤Æ¤¤¤ì¤Ð configure ¥¹¥¯¥ê¥×¥È + ¤â»È¤¨¤Þ¤¹¡£¤³¤Î¾ì¹ç¤Ï¡¢configure ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ëºÝ¤Ë + --enable-uchar-boolean ¤òɬ¤º»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£¤³¤¦¤¹¤ë¤³¤È¤Ç¡¢ + ¾¤Î Windows ·Ï¤Î½èÍý·Ï(VC++¤Ê¤É)¤¬½ÐÎϤ¹¤ë¥³¡¼¥É¤È¥Ð¥¤¥Ê¥ê¸ß´¹¤Ë + ¤Ê¤ê¤Þ¤¹¡£ + + ¢¡ cygwin (gcc 3.4.4) + + ¤Þ¤ºÃí°Õ¤¹¤Ù¤­ÅÀ¤Ï¡¢cygwin ¤Î¾ì¹ç¡¢cygwin ¤«¤é¸ø¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ + ¤¤¤ë DLL (cygjpeg-62.dll) ¤È¤Ï¥Ð¥¤¥Ê¥ê¸ß´¹¤Ë¤Ï¤Ê¤ê¤Þ¤»¤ó¡£¤³¤ì¤Ï¡¢ + ¸ø¼°ÈǤΥХ¤¥Ê¥ê¤Ë¤Ï lossless jpeg patch (ljpeg-6b.tar.gz) ¤È¤¤¤¦ + ½¤Àµ¥Ñ¥Ã¥Á¤¬´Þ¤Þ¤ì¤Æ¤¤¤ë¤¿¤á¤Ç¡¢ÅöSIMDÈǤËÂФ·¤Æ¤³¤Î¥Ñ¥Ã¥Á¤òŬÍÑ + ¤¹¤ë¤³¤È¤Ïº¤Æñ¤À¤«¤é¤Ç¤¹¡£ + + ¥Ð¥¤¥Ê¥ê¸ß´¹¤Ç¤Ï¤Ê¤¤¤¿¤á¡¢¸ø¼°¥ê¥ê¡¼¥¹ÈǤΠDLL ¤ò¤³¤ÎSIMDÈǤÇÃÖ¤­ + ´¹¤¨¤ë¤³¤È¤Ï¤Ç¤­¤Þ¤»¤ó¡£¤½¤Î¤¿¤áÅöSIMDÈǤΠDLL ¤Ï cygjpeg-162.dll + ¤È¤¤¤¦Ì¾Á°¤Ë¤Ê¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¡£¤³¤ì¤ò¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë + ¤³¤È¤â¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤ÎSIMDÈǤΠDLL ¤òÍøÍѤ¹¤ë¤Ë¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤ò + »ÈÍѤ·¤Æ¤¤¤ë¥½¥Õ¥È¤òºÆ¥³¥ó¥Ñ¥¤¥ë¡¿ºÆ¥ê¥ó¥¯¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£ + + ¤Ê¤ª¡¢¤³¤Î DLL ¤Ë¤Ä¤±¤é¤ì¤ë¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òÊѤ¨¤¿¤±¤ì¤Ð¡¢configure + ¥¹¥¯¥ê¥×¥È¤òÁö¤é¤»¤ëÁ°¤Ë config.ver ¤ÎÆâÍÆ¤òÊѹ¹¤·¤Æ¤¯¤À¤µ¤¤¡£ + + +¢£Ê£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg (altui/) + + ¥µ¥ó¥×¥ë¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤Î cjpeg ¤È djpeg ¤Ë¤Ä¤¤¤Æ¤Ç¤¹¤¬¡¢¥Ç¥Õ¥©¥ë¥È¤Î + ¾õÂ֤ǥ³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ë¤â¤Î(¾¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë·²¤ÈƱ¤¸¾ì½ê¤Ë¤¢¤ë cjpeg.c + ¤È djpeg.c) ¤Ï¡¢°ìÅ٤˰ì¤Ä¤Î¥Õ¥¡¥¤¥ë¤·¤«ÊÑ´¹¤Ç¤­¤Ê¤¤¤â¤Î¤Ç¤¹¡£¤Ä¤Þ¤ê¡¢ + ÆþÎÏ¥Õ¥¡¥¤¥ë¤Ï¥³¥Þ¥ó¥É¥é¥¤¥ó¾å¤Ë°ì¤Ä¤·¤«»ØÄê¤Ç¤­¤º¡¢½ÐÎϤÏɸ½à½ÐÎϤ« + -outfile ¥ª¥×¥·¥ç¥ó¤Ç»ØÄꤷ¤¿¥Õ¥¡¥¤¥ë¤Ë½ñ¤­½Ð¤µ¤ì¤Þ¤¹¡£³Æ¼ï¤Î UNIX ·Ï + OS ¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ìÍøÍѤµ¤ì¤Æ¤¤¤ë cjpeg/djpeg ¤Ï¤³¤Î¥¿¥¤¥×¤Î¤â¤Î¤Ç¤¹¡£ + + °ìÊý¡¢IJG ¤«¤é¸ø¼°¤Ë¥ê¥ê¡¼¥¹¤µ¤ì¤Æ¤¤¤ë MS-DOS ÈǤΠcjpeg/djpeg + (ftp://ftp.simtel.net/.2/simtelnet/msdos/graphics/jpeg6_b.zip) ¤Ç¤Ï¡¢ + Ê£¿ô¤ÎÆþÎÏ¥Õ¥¡¥¤¥ë¤ò»ØÄê¤Ç¤­¡¢½ÐÎÏ¤ÏÆþÎÏ¥Õ¥¡¥¤¥ë¤ÈƱ¤¸¾ì½ê¤Ë¼«Æ°Åª¤Ë + ºî¤é¤ì¤Þ¤¹¡£¤³¤Î¥¿¥¤¥×¤Î cjpeg/djpeg ¤òºî¤ê¤¿¤±¤ì¤Ð¡¢altui/ ¤ÎÃæ¤Ë¤¢¤ë + cjpeg.c ¤È djpeg.c ¤ò¡¢¸µ¤«¤é¤¢¤ë(£±¥Õ¥¡¥¤¥ëÈǤÎ) cjpeg.c / djpeg.c ¤È + Æþ¤ìÂØ¤¨¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£¤³¤Î altui ÈǤΠcjpeg.c / djpeg.c ¤Ï¡¢ + ¸µ¡¹ jpegaltui.v6b.tar.gz ¤È¤¤¤¦¥Õ¥¡¥¤¥ë̾¤ÇÇÛÉÛ¤µ¤ì¤Æ¤¤¤¿¤â¤Î¤ËÂФ·¤Æ + SIMD Âбþ²½¤Ë´Ø¤¹¤ë½¤Àµ¤ò²Ã¤¨¤¿¤â¤Î¤Ç¤¹¡£ + + ¤³¤ÎÊ£¿ô¥Õ¥¡¥¤¥ëÂбþÈǤΠcjpeg/djpeg ¤ò Windows ·Ï¤Ê¤É¤ÎÈó UNIX ´Ä¶­¤Ç + »ÈÍѤ¹¤ë¾ì¹ç¡¢ÆþÎÏ¥Õ¥¡¥¤¥ë̾¤ò¥ï¥¤¥ë¥É¥«¡¼¥É¤Ç»ØÄê¤Ç¤­¤ë¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï¡¢ + ³Æ¥³¥ó¥Ñ¥¤¥é¤Ë¸ÇÍ­¤ÎÆÃÊ̤ÊÀßÄ꤬ɬÍפˤʤë¾ì¹ç¤¬¤¢¤ê¤Þ¤¹¡£¤Ê¤¼¤Ê¤é¡¢ + MS-DOS·Ï¡¿Windows·Ï¤Î´Ä¶­¤Ç¤Ï°ìÈ̤ˡ¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤Ï¥³¥ó¥Ñ¥¤¥é + ¤ËÉÕ°¤Î¥¹¥¿¡¼¥È¥¢¥Ã¥×¥³¡¼¥ÉÆâ¤Ç¹Ô¤Ê¤ï¤ì¤ë¤¿¤á¤Ç¤¹¡£ + + MinGW ¤ä DJGPP V.2 ¤Ê¤É¤Î¾ì¹ç¤Ï¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤ÏºÇ½é¤«¤éÍ­¸ú + ¤Ë¤Ê¤Ã¤Æ¤¤¤ë¤¿¤á¡¢ÆÃÊ̤ʤ³¤È¤ò¤·¤Ê¤¯¤Æ¤â¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë»ØÄê + ¤Ï¤Ç¤­¤Þ¤¹¡£Microsoft Visual C++ ¤ä Borland C++ ¤Î¾ì¹ç¤Ï¡¢ÉáÄÌ¡¢¥ï¥¤¥ë¥É + ¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤¹¤ë¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤Ç¤¢¤ë setargv.obj ¤ä + wildargs.obj ¤ò EXE ¥Õ¥¡¥¤¥ë¤Î¥ê¥ó¥¯»þ¤Ë¾¤Î¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤È°ì½ï¤Ë + ¥ê¥ó¥¯¤¹¤ë¤³¤È¤Ç¡¢¥ï¥¤¥ë¥É¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î SIMD ÈÇ + cjpeg/djpeg ¤Î¾ì¹ç¤Ï¡¢setargv.obj ¤ä wildargs.obj ¤ÎÆâÍÆ¤ËÁêÅö¤¹¤ë¥³¡¼¥É¤ò + cjpeg.c/djpeg.c ¤ËľÀܽñ¤­¹þ¤ó¤Ç¤¢¤ë¤¿¤á¡¢¥×¥í¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤ä Makefile + ¤Ë¾åµ­¤Î¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤òÄɲ䷤ʤ¯¤Æ¤â¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë + »ØÄ꤬¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¡£¤³¤ì¤é°Ê³°¤Î£Ã¥³¥ó¥Ñ¥¤¥é¤ò»ÈÍѤ·¤¿¾ì¹ç¤Ç¡¢ + ¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë»ØÄ꤬¤Ç¤­¤Ê¤¤¾ì¹ç¤Ï¡¢¥³¥ó¥Ñ¥¤¥é¤Î¥Þ¥Ë¥å¥¢¥ë + ¤ò»²¾È¤·¤Æ¡¢¥ï¥¤¥ë¥É¥«¡¼¥ÉŸ³«¤òÍ­¸ú²½¤¹¤ëÀßÄê¤Ç¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + + °ìÊý¡¢linux ¤Ê¤É¤Î UNIX ´Ä¶­¤Ç¤Ï¡¢¥ï¥¤¥ë¥É¥«¡¼¥É¤ÎŸ³«½èÍý¤Ï¥×¥í¥°¥é¥à¤¬ + µ¯Æ°¤µ¤ì¤ëÁ°¤Ë¥³¥Þ¥ó¥É¥·¥§¥ë¤Ë¤è¤Ã¤Æ¹Ô¤Ê¤ï¤ì¤ë¤¿¤á¡¢¥³¥ó¥Ñ¥¤¥é¤ÎÀßÄê¤Ê¤É + ¤ÏɬÍפ¢¤ê¤Þ¤»¤ó¡£¥ï¥¤¥ë¥É¥«¡¼¥É¤Ë¤è¤ë¥Õ¥¡¥¤¥ë̾»ØÄê¤Ï¾ï¤Ë»È¤¨¤Þ¤¹¡£ + + +¢£¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹¤Ë¤Ï + + SIMD ¥³¡¼¥É¤òÉղä·¤¿¤¿¤á¡¢¤½¤Îʬ¤À¤±¥³¡¼¥É¥µ¥¤¥º¤¬Áý¤¨¤Æ¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢ + JPEG library ¤ò°Ê²¼¤Î¤è¤¦¤Ê¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤Ǿï¤Ë»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ê¤é¤Ð¡¢ + jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ(¥Þ¥¯¥í)¤ò°Ê²¼¤Î¤è¤¦¤ËÊѹ¹¤·¤Æ¥³¥ó¥Ñ¥¤¥ë + ¤¹¤ë¤³¤È¤Ç¡¢»ÈÍѤµ¤ì¤Ê¤¤¥³¡¼¥É¤ò½ü³°¤¹¤ë¤³¤È¤¬¤Ç¤­¡¢¥³¡¼¥É¥µ¥¤¥º¤ò¸º¤é¤¹ + ¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£ + + ¡û cinfo.dct_method ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç + + ¤³¤ÎÊÑ¿ô¤Ï DCT±é»»¤ÎÊýË¡¤ò»ØÄꤷ¡¢cjpeg/djpeg ¤Ç¤Ï -dct ¥ª¥×¥·¥ç¥ó¤Ë + Âбþ¤·¤Þ¤¹¡£¤³¤ÎÊÑ¿ô¤ÎÃͤϥǥե©¥ë¥È¤Ç¤Ï JDCT_ISLOW ¤Ç¡¢ÆÃ¤ËÍýͳ¤Î + ¤Ê¤¤¸Â¤ê¤³¤Î¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤ǻÈÍѤ¹¤ë¤³¤È¤ò¶¯¤¯¿ä¾©¤·¤Þ¤¹¡£°Ê²¼¤Î + ¥Þ¥¯¥í¤ò #undef ¤Ë¤¹¤ë¤³¤È¤Ç¡¢JDCT_ISLOW ¤Î¾õÂ֤ǤϷ褷¤Æ»ÈÍѤµ¤ì¤Ê¤¤ + ¥³¡¼¥É¤ò½ü³°¤Ç¤­¤Þ¤¹¡£ + + #define DCT_IFAST_SUPPORTED -> #undef DCT_IFAST_SUPPORTED + #define DCT_FLOAT_SUPPORTED -> #undef DCT_FLOAT_SUPPORTED + + ¤³¤ì¤À¤±¤Ç¤â¤«¤Ê¤ê¤Î¥³¡¼¥É¥µ¥¤¥º¤¬ºï¸º¤Ç¤­¤Þ¤¹¡£ÆÃ¤Ë DCT_FLOAT_SUPPORTED + ¤ò #undef ¤Ë¤¹¤ë¤È¡¢3DNow! ¤È SSE ¤Î¥µ¥Ý¡¼¥È¤â¼«Æ°Åª¤Ë̵¸ú¤Ë¤Ê¤ê¤Þ¤¹¡£ + + ¡ûŸ³«½èÍý¤Ç cinfo.do_fancy_upsampling ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç + + ¤³¤ÎÊÑ¿ô¤Ï djpeg ¤Ç¤Ï -nosmooth ¥ª¥×¥·¥ç¥ó¤ËÁêÅö¤·¡¢-nosmooth ¤ò»ØÄê + ¤¹¤ë¤È FALSE ¤ËÀßÄꤵ¤ì¤Þ¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï TRUE ¤Ç¡¢¤³¤ì¤âÆÃ¤ËÍýͳ¤Î + ¤Ê¤¤¸Â¤ê¤³¤Î¥Ç¥Õ¥©¥ë¥È¤Î¾õÂ֤ǻÈÍѤ¹¤ë¤³¤È¤ò¶¯¤¯¿ä¾©¤·¤Þ¤¹¡£°Ê²¼¤Î + ¥Þ¥¯¥í¤ò #undef ¤Ë¤¹¤ë¤³¤È¤Ç¡¢TRUE ¤Î¾õÂ֤ǤϷ褷¤Æ»ÈÍѤµ¤ì¤Ê¤¤¥³¡¼¥É¤ò + ½ü³°¤Ç¤­¤Þ¤¹¡£ + + #define UPSAMPLE_MERGING_SUPPORTED -> #undef UPSAMPLE_MERGING_SUPPORTED + + ¡ûŸ³«½èÍý¤Ç cinfo.scale_num, cinfo.scale_denom ¤ÎÃͤòÊѹ¹¤·¤Æ¤¤¤Ê¤¤¾ì¹ç + + ¤³¤ì¤ÏÍפ¹¤ë¤Ë¡ÖJPEG½Ì¾®Å¸³«¡×¤Îµ¡Ç½¤Ç¡¢djpeg ¤Ç¤Ï -scale M/N ¥ª¥× + ¥·¥ç¥ó¤ËÁêÅö¤·¤Þ¤¹¡£¥µ¥à¥Í¥¤¥ëºîÀ®¤Ê¤É¤Î¾ì¹ç¤ËÍøÍѤµ¤ì¤ë¤³¤È¤¬Â¿¤¤ + µ¡Ç½¤Ç¤¹¤¬¡¢¤³¤ì¤ò¤Þ¤Ã¤¿¤¯»ÈÍѤ·¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ï¡¢°Ê²¼¤Î¥Þ¥¯¥í¤ò #undef + ¤Ë¤¹¤ë¤³¤È¤Ç¡¢¥³¡¼¥ÉÎ̤òºï¸º¤Ç¤­¤Þ¤¹¡£ + + #define IDCT_SCALING_SUPPORTED -> #undef IDCT_SCALING_SUPPORTED + + Ãí°ÕÅÀ¤È¤·¤Æ¡¢¤³¤ì¤é¤ÎÀßÄêÊÑ¿ô¤¬¤É¤Î¤è¤¦¤Ê¾õÂ֤ǻȤï¤ì¤ë¤«Í½Â¬¤Ç¤­¤Ê¤¤ + ¾ì¹ç¡¢¤¿¤È¤¨¤Ð¡¢¥·¥¹¥Æ¥à¤Ë¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¶¦Í­¥é¥¤¥Ö¥é¥ê¤òºî¤ë¾ì¹ç¤Ê¤É + ¤Ï¡¢¤³¤¦¤¤¤Ã¤¿¥³¡¼¥Éºï¸º¤Ï¹Ô¤Ê¤¦¤Ù¤­¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£¥³¡¼¥Éºï¸º¤ò¹Ô¤Ê¤¦ + ¤Î¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤Î»È¤ï¤ìÊý¤¬¤è¤¯¤ï¤«¤Ã¤Æ¤¤¤ëÆÃÄê¤Î¥¢¥×¥ê¥±¡¼¥·¥ç¥ó + ¤Ë¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Î¤ß¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + + +¢£ÆÃÄê¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤¹¤ë¤Ë¤Ï + + ¤³¤ì¤Ï jconfig.h ¤ÎÃæÄø¤Ë¤¢¤ë¡¢#undef JSIMD_***_NOT_SUPPORTED ¤È¤¤¤¦ + ¥Þ¥¯¥í¤ò #define ¤Ë¤¹¤ë¤³¤È¤Ç¼Â¸½¤Ç¤­¤Þ¤¹¡£configure ¥¹¥¯¥ê¥×¥È¤Ç + --disable-mmx ¤Ê¤É¤Î¥ª¥×¥·¥ç¥ó¤ò»ØÄꤷ¤¿¾ì¹ç¤Ï¡¢¤³¤Î¥Þ¥¯¥í¤Ï¼«Æ°Åª¤Ë + #define ¤µ¤ì¤Þ¤¹¡£ + + 3DNow! ¤È SSE ¤Ï¸µ¡¹¡¢ÉâÆ°¾®¿ôÅÀDCT¤Ë¤·¤«ÍøÍѤµ¤ì¤Æ¤¤¤Þ¤»¤ó¤Î¤Ç¡¢¾å½Ò¤Î + DCT_FLOAT_SUPPORTED ¤ò #undef ¤Ë¤·¤¿¤À¤±¤Ç¤Þ¤È¤á¤ÆÌµ¸ú¤Ë¤µ¤ì¤Þ¤¹¡£ + MMX ¤È SSE2 ¤Ï¡¢°µ½ÌŸ³«½èÍý¤Î³Æ½ê¤ËÍøÍѤµ¤ì¤Æ¤¤¤Æ¡¢¹â®²½¤Ø¤Î¹×¸¥ÅÙ¤¬ + ¹â¤¤¤Î¤Ç¡¢Í­¸ú¤Ë¤·¤Æ¤ª¤¯¤³¤È¤ò¤ªÁ¦¤á¤·¤Þ¤¹¤¬¡¢ÁȤ߹þ¤ßÍÑÅӤʤɡ¢¥³¡¼¥É + ¤òÁö¤é¤»¤ë¥×¥í¥»¥Ã¥µ¤Î¼ïÎब¤ï¤«¤Ã¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¡ÖÄ̾ïÍøÍѤµ¤ì¤Ê¤¤Êý¡× + ¤Î¥µ¥Ý¡¼¥È¤ò³°¤¹¤³¤È¤Ç¡¢¥³¡¼¥É¥µ¥¤¥º¤Îºï¸º¤¬¤Ç¤­¤Þ¤¹¡£ + + + +[EOF] diff --git a/simd_internal.ja.txt b/simd_internal.ja.txt new file mode 100644 index 0000000..d234901 --- /dev/null +++ b/simd_internal.ja.txt @@ -0,0 +1,293 @@ +Independent JPEG Group's JPEG software release 6b + with x86 SIMD extension for IJG JPEG library version 1.02 + == INTERNAL == +----------------------------------------------------------- + +¢£¤³¤Î¥Õ¥¡¥¤¥ë¤Ï + + ¤³¤Î¥Õ¥¡¥¤¥ë¤Ç¤Ï¡¢SIMD ÈÇ libjpeg ¥é¥¤¥Ö¥é¥ê¤Î¡¢SIMD ³ÈÄ¥Éôʬ¤Î¾ÜºÙ¤ò + ²òÀ⤷¤Þ¤¹¡£SIMD ³ÈÄ¥Éôʬ¤ËÂФ·¤Æ²¿¤é¤«¤Î¼ê¤ò²Ã¤¨¤¿¤¤¾ì¹ç¤ä¡¢É¸½à¤Ç¤Ï + Âбþ¤·¤Æ¤¤¤Ê¤¤¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤ËÂбþ¤µ¤»¤¿¤¤¾ì¹ç¤Ê¤É¤Ï¡¢¤³¤³¤òÆÉ¤ó¤Ç + ¤¯¤À¤µ¤¤¡£ + + ¢£¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¡¿¸Æ¤Ó½Ð¤·µ¬Ìó(ABI)¤Î»ØÄê + ¢£OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (jsimdgcc.c / jsimddjg.asm / jsimdw32.asm) + ¢£¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤ÎºîÀ® (makecfg.c) + ¢£SIMD Ì¿Îá¤Î¼Â¹Ô»þ¤ÎÁªÂò¡¿SIMD ưºî¥â¡¼¥É¾ðÊó + ¢£¤½¤Î¤Û¤«¤ÎÀßÄê¹àÌÜ¥Þ¥¯¥í + ¡û RGB_RED / RGB_GREEN / RGB_BLUE / RGB_PIXELSIZE + ¡û RGBX_FILLER_0XFF + ¡û JFDCT_INT_QUANTIZE_WITH_DIVISION + ¡û UPSAMPLE_H1V2_SUPPORTED + + +¢£¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¡¿¸Æ¤Ó½Ð¤·µ¬Ìó(ABI)¤Î»ØÄê + + ¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Ç½ñ¤«¤ì¤¿¥³¡¼¥É¤ò¡¢£Ã¸À¸ì¤Ê¤É¤Î¹âµé¸À¸ì¤Ç½ñ¤«¤ì¤¿¥³¡¼¥É + ¤È¥ê¥ó¥¯¤¹¤ë¤Ë¤Ï¡¢¥ª¥Ö¥¸¥§¥¯¥È¥Õ¥¡¥¤¥ë¤Î¥Õ¥©¡¼¥Þ¥Ã¥È¤ò°ìÃפµ¤»¤ë¤³¤È¡¢ + ¤ª¤è¤Ó¡¢¸Æ¤Ó½Ð¤·µ¬Ìó¤Ê¤É¤Î¥Ð¥¤¥Ê¥ê¥³¡¼¥É¤Îµ¬Ìó(ABI)¤ò°ìÃפµ¤»¤ë¤³¤È¤¬ + ɬÍפǤ¹¡£¤³¤Î¥½¥Õ¥È¤Ç¤Ï¡¢¤³¤ì¤é¤Î»ØÄê¤ò¡¢¥¢¥»¥ó¥Ö¥é nasm ¤ËÍ¿¤¨¤ë¥ª¥× + ¥·¥ç¥ó¤Ç»ØÄꤷ¤Æ¤¤¤Þ¤¹¡£ + + ¡¦nasm -fwin32 -DWIN32 ... + + Win32 ¤Î coff ¥Õ¥©¡¼¥Þ¥Ã¥È¡£Microsoft Visual C++ ¤ä MinGW¡¦CygWin + ¤Ê¤É¡¢Win32 ¥³¥ó¥Ñ¥¤¥é¤ÎÂçÉôʬ¤¬³ºÅö¡£ + + ¡¦nasm -fobj -DOBJ32 ... + + Win32 ¤Î obj ¥Õ¥©¡¼¥Þ¥Ã¥È¡£¸µ¡¹¤Ï MS-DOS ¤Ç»È¤ï¤ì¤Æ¤¤¤¿ obj ·Á¼° + (MSOMF)¤ò 32bit ¤Ë³ÈÄ¥¤·¤¿¤â¤Î¡£Borland C++ Complier (Win32) ¤Ê¤É¡£ + + ¡¦nasm -felf -DELF ... + + ³Æ¼ï¤Î UNIX ¤Ç¹­¤¯ºÎÍѤµ¤ì¤Æ¤¤¤ë ELF ¥Õ¥©¡¼¥Þ¥Ã¥È¡£linux ¤ä xBSD + ¥Õ¥¡¥ß¥ê¤Ê¤É¡¢¸½ºß¤Î UNIX ¤ÎÂçÉôʬ¤¬³ºÅö¡£ + + ¡¦nasm -faoutb -DAOUT ... + + °ÊÁ°¤Î xBSD ¥Õ¥¡¥ß¥ê¤Ç»È¤ï¤ì¤Æ¤¤¤¿ a.out ¥Õ¥©¡¼¥Þ¥Ã¥È¡£ + + ¡¦nasm -fmacho -DMACHO ... + + Darwin (MacOS X) ¤Ê¤É¤ÇºÎÍѤµ¤ì¤Æ¤¤¤ë Mach-O ¥Õ¥©¡¼¥Þ¥Ã¥È¡£ + Ãí¡Ë-fmacho ¥ª¥×¥·¥ç¥ó¤Ï nasm 0.98.40 °Ê¹ß¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤¹¡£ + + ¡¦nasm -fcoff -DDJGPP ... + + MS-DOS ¤Î DJGPP ¥³¥ó¥Ñ¥¤¥é¤Ç»È¤ï¤ì¤ë coff ¥Õ¥©¡¼¥Þ¥Ã¥È¡£ + + ¤³¤Î¤¦¤Á¡¢-f ¥ª¥×¥·¥ç¥ó¤Ï nasm ¤¬²ò¼á¤¹¤ë¥Õ¥¡¥¤¥ë¥Õ¥©¡¼¥Þ¥Ã¥È¤Î»ØÄê»Ò¤Ç¡¢ + -D ¥ª¥×¥·¥ç¥ó(¥Þ¥¯¥í¤ÎÄêµÁ)¤Ï jsimdext.inc ¤ÎÃæ¤Ç²ò¼á¤µ¤ì¤ë¥Ð¥¤¥Ê¥êµ¬Ìó + (ABI)¤Î»ØÄê»Ò¤Ç¤¹¡£jsimdext.inc ¤Ç¤Ï¡¢-D ¥ª¥×¥·¥ç¥ó¤Ç¤Î¥Þ¥¯¥íÄêµÁ¤Ë½¾¤Ã¤Æ¡¢ + ¥»¥°¥á¥ó¥È(¥»¥¯¥·¥ç¥ó)¤ÎÄêµÁ¤ä³°Éô̾̾Á°Áõ¾þ¤ÎÄêµÁ¤ò¹Ô¤Ê¤Ã¤Æ¤¤¤Þ¤¹¡£ + ¾Ü¤·¤¯¤Ï jsimdext.inc ¤ò¤´Í÷¤¯¤À¤µ¤¤¡£ + + ELF ·Á¼° ¤ª¤è¤Ó a.out ·Á¼° ¤Î¾ì¹ç¡¢-DPIC ¤òÄɲ䷤ƻØÄꤹ¤ë¤È¥³¡¼¥É¤¬ + Position Independent Code (°ÌÃÖÆÈΩ¥³¡¼¥É) ¤Ë¤Ê¤ê¤Þ¤¹¡£-DPIC ¤Ï + jsimdext.inc ¤ÎÃæ¤Ç²ò¼á¤µ¤ì¡¢¥³¡¼¥É¤ò PIC ¤Ë¤¹¤ë¤¿¤á¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤Þ¤¹¡£ + Mach-O ·Á¼°¤Î¾ì¹ç¤Ï¡¢¥³¡¼¥É¤Ï¾ï¤Ë PIC ¤Ç¤¢¤ëɬÍפ¬¤¢¤ë¤¿¤á¡¢-DPIC ¤ò + »ØÄꤷ¤Ê¤¯¤Æ¤â¾ï¤Ë PIC ·Á¼°¤Î¥³¡¼¥É¤òÀ¸À®¤·¤Þ¤¹¡£ + + ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë±þ¤¸¤Æ¡¢¤³¤ì¤é¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤òÁªÂò¤¹¤ëɬÍפ¬¤¢¤ê + ¤Þ¤¹¡£Æ±º­¤Î makefile ¤Ç¤Ï¡¢¤¢¤é¤«¤¸¤áŬÀڤʤâ¤Î¤¬»ØÄꤵ¤ì¤Æ¤¤¤Þ¤¹¡£ + configure ¥¹¥¯¥ê¥×¥È¤Ç¤Ï¡¢config.guess ¤¬½ÐÎϤ¹¤ë¥Û¥¹¥È¾ðÊó¤ò¸µ¤ËÁªÂò + ¤·¤Æ¤¤¤Þ¤¹¡£ + + +¢£OS ¤Î SIMD ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯ (jsimdgcc.c / jsimddjg.asm / jsimdw32.asm) + + SIMD Ì¿Îá¤ò¼Â¹Ô¤¹¤ë¤Ë¤Ï¡¢»öÁ°¤Î CPU ¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤À¤±¤Ç¤Ï¤Ê¤¯¡¢ + OS ¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤âɬÍפǤ¹¡£ÆÃ¤Ë SSE/SSE2 ¤Ë¤Ä¤¤¤Æ¤Ï¡¢OS ¦¤Ç + SSE/SSE2 Ì¿Îá¤ò¼Â¹Ô¤Ç¤­¤ë¤è¤¦¤Ë»öÁ°¤ËCPU¤òÀßÄꤹ¤ëɬÍפ¬¤¢¤ê¡¢¤½¤ì¤ò + ¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤ OS ¤Ç¤Ï¡¢OS ¤¬¥·¥ó¥°¥ë¥¿¥¹¥¯¤«¥Þ¥ë¥Á¥¿¥¹¥¯¤«¤Ë´Ø¤ï¤é¤º¡¢ + SSE/SSE2 ¤Ï¼Â¹Ô¤Ç¤­¤Þ¤»¤ó¡£¤µ¤é¤Ë¡¢¤¢¤Þ¤êÃΤé¤ì¤Æ¤¤¤Þ¤»¤ó¤¬¡¢CPU ¤Ë + Æâ¢¤µ¤ì¤¿ FPU (¿ôÃͱ黻¥×¥í¥»¥Ã¥µ) ¤ò»ÈÍѤ·¤Ê¤¤(¥¨¥ß¥å¥ì¡¼¥È¤¹¤ë)ÀßÄê + ¤Ë¤Ê¤Ã¤Æ¤¤¤ë¤È¡¢MMX ¤ä 3DNow! ¤â´Þ¤á¤¹¤Ù¤Æ¤Î SIMD Ì¿Îá¤ÏÁ´¤¯¼Â¹Ô¤Ç¤­ + ¤Þ¤»¤ó¡£Íפ¹¤ë¤Ë¡¢SIMD Ì¿Îá¤ò¼Â¹Ô¤Ç¤­¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¤Ë¤Ï¡¢CPUID ¤Î + ¥Õ¥é¥°¤òÄ´¤Ù¤ë¤À¤±¤Ç¤ÏÉÔ½½Ê¬¤È¤¤¤¦¤³¤È¤Ç¤¹¡£ + + SIMD Ì¿Îá¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤È¤¤¤Ã¤Æ¤â¡¢Êݸî¥â¡¼¥É¤Çư¤¤¤Æ¤¤¤ë + ¥×¥í¥°¥é¥à¤Î¾ì¹ç¡¢CPU ¤ÎÀßÄê¥Õ¥é¥°¤Î¾õÂÖ¤òľÀܥ桼¥¶¡¦¥×¥í¥°¥é¥à¤«¤é + ÆÉ¤ß¤È¤ë¤³¤È¤¬¤Ç¤­¤Ê¤¤¤¿¤á¡¢SIMD Ì¿Îá¤ò»î¤·¤Ë¼Â¹Ô¤·¤Æ¤ß¤ÆÌµ¸úÌ¿ÎáÎã³°¤¬ + ȯÀ¸¤¹¤ë¤«¤É¤¦¤«¤ò³Î¤«¤á¤ë¤È¤¤¤¦¡¢°Ü¿¢À­¤Î°­¤¤´ÖÀÜŪ¤ÊÊýË¡¤òºÎ¤é¤¶¤ë¤ò + ÆÀ¤Ê¤¤¤Î¤¬¸½¾õ¤Ç¤¹¡£ + + ¤³¤Î SIMD Ì¿Îá¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤ò¤ä¤Ã¤Æ¤¤¤ë¤Î¤¬¡¢jsimdgcc.c / + jsimddjg.asm / jsimdw32.asm ¤Î£³¤Ä¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤Ç¤¹¡£¤½¤ì¤¾¤ì¡¢ + UNIX/gccÍÑ¡¢DJGPPÍÑ¡¢Win32ÍѤǤ¹¡£jsimdgcc.c ¤Ç¤Ï¡¢Îã³°¤ÎȯÀ¸¤ò + signal() ´Ø¿ô¤Î¥·¥°¥Ê¥ë¥Ï¥ó¥É¥é¤ÇÊá¤Þ¤¨¤Æ¤¤¤Þ¤¹¡£¥³¡¼¥É¤Î°ìÉô¤Ë gcc ¤Î + ¥¤¥ó¥é¥¤¥ó¥¢¥»¥ó¥Ö¥é¤ò»È¤Ã¤Æ¤¤¤ë¤¿¤á¡¢gcc ÀìÍѤǤ¹¡£gcc °Ê³°¤Ç¤â + ¥³¥ó¥Ñ¥¤¥ë¤Ï¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î¾ì¹ç¤Ï SIMD Ì¿Îá¤Î¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤Ï + ¹Ô¤Ê¤ï¤ì¤Þ¤»¤ó¡£jsimddjg.asm ¤Ï DPMI ¤ÎÎã³°½èÍýµ¡¹½¤òľÀÜÍøÍѤ·¤¿ + ÊýË¡¤Ç¡¢jsimdw32.asm ¤Ï Win32 ¤ÎÎã³°½èÍýµ¡¹½¤òľÀÜÍøÍѤ·¤¿ÊýË¡¤Ç¤¹¡£ + + ¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë±þ¤¸¤Æ¡¢¤³¤Î£³¼ïÎà¤ÎÃæ¤«¤éŬÀڤʤâ¤Î¤òÁªÂò¤¹¤ëɬÍפ¬ + ¤¢¤ê¤Þ¤¹¡£Æ±º­¤Î makefile ¤Ç¤Ï¡¢¤¢¤é¤«¤¸¤áŬÀڤʤâ¤Î¤¬»ØÄꤵ¤ì¤Æ¤¤¤Þ¤¹¡£ + configure ¥¹¥¯¥ê¥×¥È¤Ç¤Ï¡¢config.guess ¤¬½ÐÎϤ¹¤ë¥Û¥¹¥È¾ðÊó¤ò¸µ¤ËÁªÂò + ¤·¤Æ¤¤¤Þ¤¹¡£ + + ¤³¤Î£³¼ïÎà¤Î¤É¤ì¤È¤âŬ¹ç¤·¤Ê¤¤¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Î¾ì¹ç¤Ï¡¢¿·¤¿¤Ê¥Á¥§¥Ã¥¯ + ´Ø¿ô¤ò½ñ¤¯É¬Íפ¬¤¢¤ê¤Þ¤¹¡£¤Ç¤¹¤¬¡¢¤½¤Î OS ¤¬Á´¤Æ¤Î SIMD Ì¿Îá¤ò¥µ¥Ý¡¼¥È + ¤·¤Æ¤¤¤ë OS ¤Ç¤¢¤ë¤³¤È¤¬¤ï¤«¤Ã¤Æ¤¤¤ë¾ì¹ç¤Ï¡¢¤³¤Î OS ¥µ¥Ý¡¼¥È¥Á¥§¥Ã¥¯¤Ï + ¾Êά¤¹¤ë¤³¤È¤â²Äǽ¤Ç¤¹¡£°Ê²¼¤Î¤è¤¦¤Ê¶õ¤Î´Ø¿ô¤Î¤ß¤Î¥½¡¼¥¹¥Õ¥¡¥¤¥ë¤ò + ºî¤Ã¤Æ¾åµ­¤Î£³¤Ä¤Î¥Õ¥¡¥¤¥ë¤ÎÂå¤ï¤ê¤Ë»ÈÍѤ¹¤ë¤«¡¢¤â¤·¤¯¤Ï jcomapi.c ¤ò + ²þÊѤ·¤Æ jpeg_simd_os_support ¤Î¸Æ¤Ó½Ð¤·¤ò¥Ð¥¤¥Ñ¥¹¤¹¤ë¤è¤¦¤Ë¤¹¤ì¤Ð£Ï£Ë + ¤Ç¤¹¡£ + + GLOBAL(unsigned int) + jpeg_simd_os_support (unsigned int simd) + { + return simd; + } + + +¢£¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc ¤ÎºîÀ® (makecfg.c) + + Åö¥½¥Õ¥È¤Î¾ì¹ç¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ì¤Î¥½¡¼¥¹¥³¡¼¥ÉÃæ¤«¤é£Ã¸À¸ì¤Î¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë + ¤Ë¤¢¤ë¾ðÊ󡢤¿¤È¤¨¤Ð¡¢¥×¥ê¥×¥í¥»¥Ã¥µ¥Þ¥¯¥í¤ÎÃͤ乽¤ÂΤÎÃæ¤Ë¤¢¤ëÊÑ¿ô¤Î + ¥ª¥Õ¥»¥Ã¥È¤Ê¤É¡¢¤òÃΤëɬÍפ¬¤¢¤ê¤Þ¤¹¡£makecfg.c ¤Ï¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤Î + ¥³¥ó¥Ñ¥¤¥ë¤ËÀèΩ¤Ã¤Æ¥³¥ó¥Ñ¥¤¥ë¡¦¥ê¥ó¥¯¡¦¼Â¹Ô¤µ¤ì¡¢¥¢¥»¥ó¥Ö¥ê¸À¸ì¦¤Î + ¥½¡¼¥¹¥³¡¼¥É¤ÇɬÍפȤʤë¾ðÊó¤ò¥¢¥»¥ó¥Ö¥ê¸À¸ìÍÑÀßÄê¥Õ¥¡¥¤¥ë jsimdcfg.inc + ¤È¤·¤Æ½ÐÎϤ¹¤ëƯ¤­¤ò¤·¤Æ¤¤¤Þ¤¹¡£ + + Ãí°ÕÅÀ¤È¤·¤Æ¡¢makecfg.c ¤Ï JPEG ¥é¥¤¥Ö¥é¥ê¤Î¥½¡¼¥¹¥³¡¼¥É¤ÈƱ¤¸¥³¥ó¥Ñ¥¤¥ë + ¥ª¥×¥·¥ç¥ó¤Ç¥³¥ó¥Ñ¥¤¥ë¤µ¤ì¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£ÆÃ¤Ë¡¢¹½Â¤ÂΤβò¼á(¥µ¥¤¥º + ¤Ê¤É)¤¬ JPEG ¥é¥¤¥Ö¥é¥êËÜÂΤΤâ¤Î¤È°Û¤Ê¤Ã¤Æ¤·¤Þ¤¦¤È¡¢JPEG ¥é¥¤¥Ö¥é¥ê¤¬ + ¥¯¥é¥Ã¥·¥å¤·¤Þ¤¹¡£ + + +¢£SIMD Ì¿Îá¤Î¼Â¹Ô»þ¤ÎÁªÂò¡¿SIMD ưºî¥â¡¼¥É¾ðÊó + + ¤³¤Î SIMD ³ÈÄ¥ÈÇ JPEG ¥é¥¤¥Ö¥é¥ê¤Ç¤Ï¡¢¥×¥í¥°¥é¥à¤Î¼Â¹Ô»þ¤ËÆÃÄê¤Î SIMD + Ì¿Îá¤ò»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë¤·¤¿¤ê¡¢³Æ½èÍýÃʳ¬¤Ç¤É¤Î SIMD Ì¿Îá¤ò»ÈÍѤ·¤ÆÆ°ºî + ¤¹¤ë¤Î¤«¤òÇİ®¤Ç¤­¤ë»ÅÁȤߤ¬ÍѰդµ¤ì¤Æ¤¤¤Þ¤¹¡£ + + jpeg_simd_mask() ¤ò»È¤¦¤È¡¢ÆÃÄê¤Î SIMD Ì¿Îá¤ò¼Â¹Ô»þ¤Ë»ÈÍѤ·¤Ê¤¤¤è¤¦¤Ë + ¤Ç¤­¤Þ¤¹¡£ + + GLOBAL(unsigned) + jpeg_simd_mask (j_common_ptr cinfo, unsigned remove, unsigned add); + + ¤³¤Î´Ø¿ô¤Ï¡¢³Æ SIMD Ì¿Î᥻¥Ã¥È¤ËÂбþ¤¹¤ë¥Þ¥¹¥¯¥Ó¥Ã¥È¤òÀßÄꡦÊѹ¹¤·¤Þ¤¹¡£ + remove, add ¤½¤·¤ÆÌá¤êÃͤϡ¢³Æ SIMD Ì¿Îá¤ËÂбþ¤¹¤ë¥Ó¥Ã¥ÈÃÍ (JSIMD_MMX, + JSIMD_3DNOW, JSIMD_SSE, JSIMD_SSE2) ¤ò OR ±é»»¤ÇÁȤ߹ç¤ï¤»¤¿¤â¤Î¤Ç¤¹¡£ + + "¥Þ¥¹¥¯¥Ó¥Ã¥È" ¤Ï¡¢¹½Â¤ÂÎ cinfo ¤ËÊÝ»ý¤µ¤ì¤ëÃͤǡ¢¤½¤Î½é´üÃÍ¤Ï 0 ¤Ç¤¹¡£ + ¤½¤·¤Æ¤³¤Î´Ø¿ô¤Ï¡¢¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤ò°Ê²¼¤Î¤è¤¦¤Ë¹¹¿·(Áàºî)¤·¤Þ¤¹¡£ + + (¿·¤·¤¤¥Þ¥¹¥¯¥Ó¥Ã¥È) = ((¸Å¤¤¥Þ¥¹¥¯¥Ó¥Ã¥È) & ~remove) | add; + + ¤½¤·¤Æ¡¢¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤¬ 1 ¤Ë¤µ¤ì¤¿ SIMD Ì¿Î᥻¥Ã¥È¤Ï¡¢¤¿¤È¤¨ CPU/OS + ¤ÇÂбþ¤·¤Æ¤¤¤Æ¤â»ÈÍѤµ¤ì¤Þ¤»¤ó¡£¤³¤Î´Ø¿ô¤Ï¡¢¤³¤Î´Ø¿ô¤ò¸Æ¤ÖľÁ°¤Þ¤ÇÀßÄê + ¤µ¤ì¤Æ¤¤¤¿¥Þ¥¹¥¯¥Ó¥Ã¥È¤òÊÖ¤·¤Þ¤¹¡£¤Ê¤Î¤Ç¡¢remove, add ¶¦¤Ë 0 ¤òÍ¿¤¨¤Æ + ´Ø¿ô¤ò¸Æ¤Ù¤Ð¡¢¸½ºßÀßÄꤵ¤ì¤Æ¤¤¤ë¥Þ¥¹¥¯¥Ó¥Ã¥È¤ò¼èÆÀ¤Ç¤­¤Þ¤¹¡£¤³¤Î´Ø¿ô¤Î + »ÈÍÑÎã¤Ï¡¢cjpeg.c, djpeg.c, jcomapi.c ¤Ë¤¢¤ê¤Þ¤¹¡£ + + ¤³¤Î¥Þ¥¹¥¯¥Ó¥Ã¥È¤ÎÃͤϡ¢¼ÂºÝ¤Ë¤Ï¹½Â¤ÂÎ cinfo ¤Î output_gamma ¤â¤·¤¯¤Ï + input_gamma ÊÑ¿ô¤Î²¼°Ì¥Ó¥Ã¥È¤ÎÊݸ¤µ¤ì¤Æ¤¤¤Þ¤¹(¾Ü¤·¤¯¤Ï jcomapi.c ¤ò + »²¾È)¡£¤³¤ì¤Ï¡¢¹½Â¤ÂÎ cinfo ¤Ë¿·¤¿¤ÊÊÑ¿ô¤òÄɲ䷤Ƥ·¤Þ¤¦¤È¥Ð¥¤¥Ê¥ê¸ß´¹ + ¤¬Êø¤ì¤Æ¤·¤Þ¤¦¤¿¤á¤Ç¡¢¸½¾õ¤Ç¤Ï̤»ÈÍѤȻפï¤ì¤ë¾åµ­¤ÎÊÑ¿ô¤ò¡Ö´Ö¼Ú¤ê¡× + ¤·¤Æ¤¤¤Þ¤¹¡£ + + ¤Þ¤¿¡¢°Ê²¼¤Î´Ø¿ô·²¤ò»È¤¦¤È¡¢¥é¥¤¥Ö¥é¥êÆâÉô¤Î³Æ½èÍýÃʳ¬¤Ç¤É¤Î SIMD Ì¿Îá + ¤ò»ÈÍѤ·¤ÆÆ°ºî¤¹¤ë¤Î¤«¤òÇİ®¤Ç¤­¤Þ¤¹¡£ + + jpeg_simd_color_converter(); -> ¿§¶õ´ÖÊÑ´¹(RGB->YCbCr) + jpeg_simd_downsampler(); -> ¥À¥¦¥ó¥µ¥ó¥×¥ê¥ó¥° + jpeg_simd_forward_dct(); -> DCT½çÊÑ´¹ + jpeg_simd_color_deconverter(); -> ¿§¶õ´ÖÊÑ´¹(YCbCr->RGB) + jpeg_simd_upsampler(); -> ¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥° + jpeg_simd_inverse_dct(); -> DCTµÕÊÑ´¹ + + ÊÖ¤¹Ãͤϡ¢ÉâÆ°¾®¿ôÅÀDCT½çÊÑ´¹/µÕÊÑ´¹ ¤Î¾ì¹ç¤Ï JSIMD_3DNOW ¤« JSIMD_SSE¡¢ + ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï JSIMD_MMX ¤« JSIMD_SSE2 ¤òÊÖ¤·¤Þ¤¹¡£¤Þ¤¿¡¢0 ¤¬ÊÖ¤Ã¤Æ + ¤­¤¿¾ì¹ç¤Ï SIMD Ì¿Î᥻¥Ã¥È¤Ï»È¤ï¤ì¤º¡¢½¾Íè¤Î¥ë¡¼¥Á¥ó¤¬»È¤ï¤ì¤ë¤³¤È¤ò + °ÕÌ£¤·¤Þ¤¹¡£ + + ¤³¤ì¤é¤Î´Ø¿ô¤Î¾Ü¤·¤¤»È¤¤Êý¤Ë¤Ä¤¤¤Æ¤Ï¡¢cjpeg.c, djpeg.c (»ÈÍÑÎã) ¤ò¤´Í÷ + ¤¯¤À¤µ¤¤¡£ + + ¤Ê¤ª¡¢¤³¤ì¤é¤Î SIMD ¥Þ¥¹¥¯´Ø¿ô¡¿SIMD ¥â¡¼¥É¾ðÊó´Ø¿ô ¤¬É¬Íפʤ¤¾ì¹ç¤Ï¡¢ + °Ê²¼¤Î¥Þ¥¯¥í¤ò jconfig.h ¤Ê¤É¤Ë´Þ¤á¤ë¤³¤È¤Ç¡¢¶Ï¤«¤Ç¤¹¤¬¥³¡¼¥É¥µ¥¤¥º¤¬ + ÀáÌó¤Ç¤­¤Þ¤¹¡£ + + #define JSIMD_MASKFUNC_NOT_SUPPORTED + #define JSIMD_MODEINFO_NOT_SUPPORTED + + +¢£¤½¤Î¤Û¤«¤ÎÀßÄê¹àÌÜ¥Þ¥¯¥í + + ¡û RGB_RED / RGB_GREEN / RGB_BLUE / RGB_PIXELSIZE + + ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¡¢¼è¤ê°·¤¦ RGB ·Á¼°²èÁü + ¥Ç¡¼¥¿¤Î RGB ¤Îʤӽç¤ä¥Ô¥¯¥»¥ë¥µ¥¤¥º¤òÀßÄꤷ¤Þ¤¹¡£¤³¤Î SIMD ³ÈÄ¥ÈÇ + ¤Ç¤âÊѹ¹¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Æ¤¢¤ê¤Þ¤¹¤¬¡¢RGB_PIXELSIZE ¤¬ 3 ¤« 4 ¤Î¾ì¹ç¤Î¤ß¡¢ + SIMD ÈǤο§¶õ´ÖÊÑ´¹¥ë¡¼¥Á¥ó¤¬Í­¸ú¤Ë¤Ê¤ê¤Þ¤¹¡£¤½¤ì°Ê³°¤ÎÃͤˤ·¤¿¾ì¹ç¤Ï¡¢ + SIMD ÈǤο§¶õ´ÖÊÑ´¹¥ë¡¼¥Á¥ó¤Ï¼«Æ°Åª¤Ë̵¸ú²½¤µ¤ì¤Æ¡¢½¾Íè¤Î¿§¶õ´ÖÊÑ´¹ + ¥ë¡¼¥Á¥ó¤¬»È¤ï¤ì¤Þ¤¹(¤ä¤äÄ㮤ˤʤê¤Þ¤¹)¡£ + + ¤³¤ì¤é¤ÎÃͤòÊѹ¹¤¹¤ë¤³¤È¤Ç¡¢½ÐÎϤò 32bit/pixel ·Á¼°¤Ë¤·¤¿¤ê¡¢BMP ·Á¼°¤Ë + ¹ç¤ï¤»¤Æ¥Ô¥¯¥»¥ë¤ò BGR ½ç¤Ë¤·¤¿¤ê¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ê¤Þ¤¹¡£¤Ê¤ª¡¢ + ¤³¤ì¤é¤ÎÃͤòÌ·½â¤¹¤ëÃÍ¤Ë #define ¤·¤¿¾ì¹ç¤Ï¥³¥ó¥Ñ¥¤¥ë¤Ç¤­¤Ê¤¤¤è¤¦¤Ë¤·¤Æ + ¤¢¤ê¤Þ¤¹¤Î¤Ç¡¢Ãí°Õ¤·¤Æ¤¯¤À¤µ¤¤¡£ + + ¡û RGBX_FILLER_0XFF + + ¤³¤ì¤â jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï #undef + ¤Ë¤Ê¤Ã¤Æ¤¤¤Þ¤¹¡£¾å¤Î RGB_PIXELSIZE ¤ò 4 ¤Ë¤·¤¿¾ì¹ç¡¢£±¤Ä¤Î¥Ô¥¯¥»¥ë + ¥Ç¡¼¥¿Ãæ¤Ë(RGB¤Î£³¥Ð¥¤¥È¤Î¾¤Ë);·×¤Ê£±¥Ð¥¤¥È¤¬Â¸ºß¤¹¤ë¤³¤È¤Ë¤Ê¤ê¤Þ¤¹¡£ + ¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï¡¢¤³¤Î;·×¤Ê£±¥Ð¥¤¥È(filler byte)¤Ë¤Ï 0x00 ¤¬Ëä¤á¤é¤ì¤Æ + ½ÐÎϤµ¤ì¤Þ¤¹¤¬¡¢¤³¤Î RGBX_FILLER_0XFF ¤ò #define ¤¹¤ë¤È 0x00 ¤ÎÂå¤ï¤ê + ¤Ë 0xFF ¤¬ filler byte ¤ËËä¤á¤é¤ì¤Æ½ÐÎϤµ¤ì¤Þ¤¹¡£ + + ½ÐÎϤò 32bit/pixel ·Á¼°¤Ë¤·¤¿¾ì¹ç¤Ç¡¢filler byte ¤ò¥¢¥ë¥Õ¥¡¥Á¥ã¥Í¥ë + ¤È¤·¤Æ°·¤¤¤¿¤¤¾ì¹ç¤Ê¤É¤Ï¡¢RGBX_FILLER_0XFF ¤ò #define ¤¹¤ë¤ÈÅԹ礬Îɤ¤ + ¾ì¹ç¤¬¤¢¤ë¤Ç¤·¤ç¤¦¡£ + + ¤Ê¤ª¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠJPEG ¥é¥¤¥Ö¥é¥ê¤Ç¤Ï¡¢¤³¤Î filler byte ¤Ë¤Ï²¿¤â + µÍ¤á¤é¤ì¤º¡¢¸µ¤ÎÃͤ¬¤½¤Î¤Þ¤ÞÊÝ»ý¤µ¤ì¤Þ¤¹¡£¤Ç¤¹¤¬¡¢SIMD ÈǤο§¶õ´ÖÊÑ´¹ + ¥ë¡¼¥Á¥ó¤Ç¤Ï¸µ¤ÎÃͤòÊÝ»ý¤¹¤ë¤Ë¤Ï¼ê´Ö¤¬¤«¤«¤ë¤¿¤á¡¢¾ï¤Ë 0x00 ¤« 0xFF + ¤ÇËä¤á¤Æ½ÐÎϤ¹¤ë¤è¤¦¤Ë»ÅÍÍÊѹ¹¤ò¹Ô¤Ê¤¤¤Þ¤·¤¿¡£ + + ¡û JFDCT_INT_QUANTIZE_WITH_DIVISION + + ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ëÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¡¢°µ½Ì½èÍý¤Ç¤Î DCT·¸¿ô¤Î + Î̻Ҳ½½èÍý¤ÎÊýË¡¤òÊѹ¹¤·¤Þ¤¹¡£¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï #undef ¤Ç¡¢#undef ¤Î¾õÂ֤Π+ Êý¤¬¹â®¤Ê¤Î¤Ç¡¢ÆÃ¤ËÍýͳ¤Î¤Ê¤¤¸Â¤ê #undef ¤Ç»ÈÍѤ¹¤ë¤³¤È¤ò¤ªÁ¦¤á¤¤¤¿¤· + ¤Þ¤¹¡£ + + DCT·¸¿ô¤ÎÎ̻Ҳ½½èÍý¤È¤¤¤¦¤Î¤Ï¡¢Ã¼Åª¤Ë¸À¤¨¤Ð²èÁü¥Ç¡¼¥¿¤ËÂФ·¤Æ°ì¤Ä°ì¤Ä + ³ä¤ê»»(À°¿ô½ü»»)¤ò¼Â¹Ô¤¹¤ë¤³¤È¤Ç¤¹¡£¤Ç¤¹¤¬¡¢½ü»»¤Ï¸¶ÍýŪ¤Ë¹â®²½¤¬ + ÉÔ²Äǽ¤Ê¤Î¤Ç¡¢¤³¤Î SIMD ³ÈÄ¥ÈǤǤÏÀ°¿ô½ü»»¤ÎÂå¤ï¤ê¤ËÀ°¿ô¾è»»¤ò»ÈÍѤ·¤Æ + Î̻Ҳ½½èÍý¤ò¼Â¹Ô¤·¤Æ¤¤¤Þ¤¹¡£ + + ¤³¤ÎÀ°¿ô¾è»»¤òÂåÍѤ¹¤ëÊýË¡¤Ç¤â¡¢¹âÀºÅÙÀ°¿ôDCT/¹â®À°¿ôDCT¤ò»È¤Ã¤¿¾ì¹ç¤Ç¡¢ + ¤«¤Ä¡¢0¡Á100 ¤Î¤¹¤Ù¤Æ¤Î°µ½Ì¥¯¥ª¥ê¥Æ¥£ÀßÄê¤Ç¥ª¥ê¥¸¥Ê¥ëÈǤÈÁ´¤¯Æ±¤¸·ë²Ì¤ò + ½Ð¤¹¤³¤È¤ò³Îǧ¤·¤Æ¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢°µ½Ì²è¼Á¤ò "¥¯¥ª¥ê¥Æ¥£" ¤Î»ØÉ¸¤ÇÀßÄê + ¤»¤º¡¢¥¯¥ª¥ê¥Æ¥£ 0 ¤è¤ê¤âÄã²è¼Á¤ÎÎ̻Ҳ½¥Æ¡¼¥Ö¥ë¤òľÀÜÍ¿¤¨¤Æ°µ½Ì¤·¤¿¾ì¹ç + ¤Ê¤É¤Ï¡¢±é»»ÅÓÃæ¤Î¿ôÃÍÈϰϤδط¸¤Ç¡¢¥ª¥ê¥¸¥Ê¥ëÈǤȤϰۤʤë·ë²Ì¤¬½Ð¤ë¤³¤È + ¤âÈÝÄê¤Ç¤­¤Þ¤»¤ó¡£¤½¤¦¤¤¤Ã¤¿ÆÃ¼ì¤Ê¶­³¦¾ò·ï²¼¤Ç¤â¥ª¥ê¥¸¥Ê¥ëÈǤȤθߴ¹ÅÙ¤¬ + ¹â¤¯¤Ê¤é¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¾ì¹ç¤Ê¤É¤Ë¤Ï¡¢¤³¤Î¹àÌܤò #define ¤Ë¤·¤Æ»ÈÍѤ·¤Æ + ¤¯¤À¤µ¤¤¡£¼ã´³Â®ÅÙ¤ÏÍî¤Á¤Þ¤¹¤¬¡¢½¾Íè¤É¤ª¤ê¡¢°ì¤Ä°ì¤Ä½ü»»¤ò¹Ô¤Ã¤ÆÎ̻Ҳ½ + ½èÍý¤ò¹Ô¤¤¤Þ¤¹¡£ + + ¤â¤Ã¤È¤â¡¢¥¯¥ª¥ê¥Æ¥£ 0 ¤è¤ê¤âÄã²è¼Á¤ÎÀßÄê¤Ç°µ½Ì¤·¤Æ¤â¡¢¤Û¤È¤ó¤É¼ÂÍÑ¤Ë + ¤Ê¤ê¤Þ¤»¤ó¤Î¤Ç¡¢¤³¤ÎÀ°¿ô¾è»»¤òÂåÍѤ¹¤ëÊýË¡¤Ç¤â¡¢ÌäÂê¤Ë¤Ê¤ë¤³¤È¤Ï¤Ê¤¤¤È + »×¤¤¤Þ¤¹¡£ + + ¡û UPSAMPLE_H1V2_SUPPORTED + + ¤³¤ì¤Ï¡¢jmorecfg.h ¤ÎÃæ¤Ë¤¢¤ë¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG library ¤Ë¤Ï + ¸ºß¤·¤Ê¤¤ÀßÄê¹àÌÜ¥Þ¥¯¥í¤Ç¤¹¡£¤³¤ì¤Ï¡¢Y:1x2 Cb:1x1 Cr:1x1 (4:2:2) ¤Î + ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ¤ò»ý¤Ä JPEG ¥Õ¥¡¥¤¥ë¤ò¡¢¥ª¥ê¥¸¥Ê¥ëÈǤΠIJG JPEG + library ¤è¤ê¤â¹â®¡¿¹â²è¼Á¤ËŸ³«¤Ç¤­¤ë¤è¤¦¤Ë¤¹¤ë¤â¤Î¤Ç¤¹¡£ + + ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 (4:2:2) ¤ò»ý¤Ä JPEG ¥Õ¥¡¥¤¥ë¤Ï¡¢ + ¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤Ç¤âŸ³«¤Ç¤­¤Þ¤¹¤¬¡¢¤³¤Î¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥° + Èæ¤ËÂбþ¤¹¤ë¥¢¥Ã¥×¥µ¥ó¥×¥ê¥ó¥°¡¦¥ë¡¼¥Á¥ó¤¬´Êñ¤Ê¤â¤Î¤·¤«ÍѰդµ¤ì¤Æ + ¤¤¤Ê¤¤¤¿¤á¡¢Å¸³«Â®ÅÙ¤âÃÙ¤¯¡¢¤Þ¤¿¡¢¿§¤Î¶­Ìܤ¬¤Ï¤Ã¤­¤ê¤·¤Æ¤¤¤ë£Ã£Ç²èÁü + ¤Ê¤É¤Î¾ì¹ç¤Ï¥¸¥ã¥®¡¼¤¬ÌÜΩ¤Ã¤Æ¤·¤Þ¤¦¤³¤È¤¬¤¢¤ê¤Þ¤¹¡£¤³¤Î¹àÌܤòÍ­¸ú¤Ë + ¤¹¤ë¤³¤È¤Ç¡¢¤³¤Î¤è¤¦¤Ê¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 ¤ò»ý¤Ä + JPEG ¥Õ¥¡¥¤¥ë¤ò¹â®¤Ë¡¢¤Þ¤¿¡¢¥¸¥ã¥®¡¼¤¬ÌÜΩ¤¿¤Ê¤¤¤è¤¦¤Ë¹â²è¼Á¤ËŸ³« + ¤Ç¤­¤ë¤è¤¦¤Ë¤·¤Þ¤¹¡£ + + ¤³¤Î¡¢¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:1x2 Cb:1x1 Cr:1x1 ¤Î JPEG ¥Õ¥¡¥¤¥ë¤Ï¡¢¤¢¤Þ¤ê + °ìÈÌŪ¤Ê¤â¤Î¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¤¬¡¢¥Ç¥£¥¸¥¿¥ë¥«¥á¥é¤Ê¤É¤¬½ÐÎϤ¹¤ë¤³¤È¤Î¿¤¤¡¢ + ¥µ¥Ö¥µ¥ó¥×¥ê¥ó¥°Èæ Y:2x1 Cb:1x1 Cr:1x1 (4:2:2) ¤Î JPEG ¥Õ¥¡¥¤¥ë¤ËÂФ·¤Æ + ¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×½èÍý¤ò¹Ô¤Ê¤¦¤È¡¢¤³¤Î Y:1x2 Cb:1x1 Cr:1x1 ¤Î JPEG + ¥Õ¥¡¥¤¥ë¤Ë¤Ê¤ê¤Þ¤¹¡£¥Ç¥£¥¸¥¿¥ë¥«¥á¥é¤Ç¡¢¥«¥á¥é¤ò½Ä¤Ë¤·¤Æ¡Ê½Ä°ÌÃ֤ǡ˻£±Æ + ¤·¤¿²èÁü¤ò¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×¤·¤ÆÀµ¾ï¤Ê¸þ¤­¤Ëľ¤¹¡¢¤Ê¤É¤È¤¤¤¦¤³¤È¤Ï¡¢ + ¤è¤¯¤ä¤ë¤³¤È¤À¤È»×¤¤¤Þ¤¹¡£¤Ç¤¹¤¬¡¢¤³¤Î¤è¤¦¤Ê¡ÖJPEG ¥í¥¹¥ì¥¹²óž¡×¤µ¤ì¤¿ + JPEG ¥Õ¥¡¥¤¥ë¤ò¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤ÇŸ³«¤¹¤ë¤È¡¢¾åµ­¤ÎÍýͳ¤«¤é¡¢ + JPEG ¥Õ¥¡¥¤¥ë¤òŸ³«¤·¤Æ¤«¤é²èÁü½èÍý¥½¥Õ¥È¤Ç²óž¤µ¤»¤¿²èÁü¤ËÈæ¤Ù¤Æ²è¼Á¤¬ + Îô¤Ã¤Æ¤·¤Þ¤¤¤Þ¤¹¡£¤³¤Î¹àÌܤòÍ­¸ú¤Ë¤¹¤ë¤³¤È¤Ç¡¢²èÁü½èÍý¥½¥Õ¥È¤Ç²óž¤µ¤»¤¿ + ²èÁü¤È¤Û¤ÜƱ¤¸¥¯¥ª¥ê¥Æ¥£¤Ç²èÁü¤òŸ³«¤¹¤ë¤³¤È¤¬²Äǽ¤Ë¤Ê¤ê¤Þ¤¹¡£ + + ¤³¤Î¹àÌܤϡ¢ÆÃ¤ËÍýͳ¤Î¤Ê¤¤¸Â¤ê¡¢#define ¤Î¾õÂ֤ˤ·¤Æ¤ª¤¯¤³¤È¤ò¤ªÁ¦¤á + ¤¤¤¿¤·¤Þ¤¹¡£¥ª¥ê¥¸¥Ê¥ë¤Î IJG JPEG Library ¤È´°Á´¤ËƱ°ì¤Î·ë²Ì¤¬É¬Í×¤Ê + ¾ì¹ç¤Î¤ß #undef ¤Ë¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + + + +[EOF] diff --git a/jconfig.bcc b/unused/jconfig.bcc similarity index 100% rename from jconfig.bcc rename to unused/jconfig.bcc diff --git a/jconfig.mac b/unused/jconfig.mac similarity index 100% rename from jconfig.mac rename to unused/jconfig.mac diff --git a/jconfig.manx b/unused/jconfig.manx similarity index 100% rename from jconfig.manx rename to unused/jconfig.manx diff --git a/jconfig.mc6 b/unused/jconfig.mc6 similarity index 100% rename from jconfig.mc6 rename to unused/jconfig.mc6 diff --git a/jconfig.sas b/unused/jconfig.sas similarity index 100% rename from jconfig.sas rename to unused/jconfig.sas diff --git a/jconfig.st b/unused/jconfig.st similarity index 100% rename from jconfig.st rename to unused/jconfig.st diff --git a/jconfig.vms b/unused/jconfig.vms similarity index 100% rename from jconfig.vms rename to unused/jconfig.vms diff --git a/jconfig.wat b/unused/jconfig.wat similarity index 100% rename from jconfig.wat rename to unused/jconfig.wat diff --git a/jfdctflt.c b/unused/jfdctflt.c similarity index 100% rename from jfdctflt.c rename to unused/jfdctflt.c diff --git a/jfdctfst.c b/unused/jfdctfst.c similarity index 100% rename from jfdctfst.c rename to unused/jfdctfst.c diff --git a/jfdctint.c b/unused/jfdctint.c similarity index 100% rename from jfdctint.c rename to unused/jfdctint.c diff --git a/jidctflt.c b/unused/jidctflt.c similarity index 100% rename from jidctflt.c rename to unused/jidctflt.c diff --git a/jidctfst.c b/unused/jidctfst.c similarity index 100% rename from jidctfst.c rename to unused/jidctfst.c diff --git a/jidctint.c b/unused/jidctint.c similarity index 100% rename from jidctint.c rename to unused/jidctint.c diff --git a/jidctred.c b/unused/jidctred.c similarity index 100% rename from jidctred.c rename to unused/jidctred.c diff --git a/jmemdos.c b/unused/jmemdos.c similarity index 100% rename from jmemdos.c rename to unused/jmemdos.c diff --git a/jmemdosa.asm b/unused/jmemdosa.asm similarity index 100% rename from jmemdosa.asm rename to unused/jmemdosa.asm diff --git a/jmemmac.c b/unused/jmemmac.c similarity index 100% rename from jmemmac.c rename to unused/jmemmac.c diff --git a/makcjpeg.st b/unused/makcjpeg.st similarity index 100% rename from makcjpeg.st rename to unused/makcjpeg.st diff --git a/makdjpeg.st b/unused/makdjpeg.st similarity index 100% rename from makdjpeg.st rename to unused/makdjpeg.st diff --git a/makeapps.ds b/unused/makeapps.ds similarity index 100% rename from makeapps.ds rename to unused/makeapps.ds diff --git a/makefile.bcc b/unused/makefile.bcc similarity index 100% rename from makefile.bcc rename to unused/makefile.bcc diff --git a/makefile.manx b/unused/makefile.manx similarity index 100% rename from makefile.manx rename to unused/makefile.manx diff --git a/makefile.mc6 b/unused/makefile.mc6 similarity index 100% rename from makefile.mc6 rename to unused/makefile.mc6 diff --git a/makefile.mms b/unused/makefile.mms similarity index 100% rename from makefile.mms rename to unused/makefile.mms diff --git a/makefile.sas b/unused/makefile.sas similarity index 100% rename from makefile.sas rename to unused/makefile.sas diff --git a/makefile.vms b/unused/makefile.vms similarity index 100% rename from makefile.vms rename to unused/makefile.vms diff --git a/makefile.wat b/unused/makefile.wat similarity index 100% rename from makefile.wat rename to unused/makefile.wat diff --git a/makelib.ds b/unused/makelib.ds similarity index 100% rename from makelib.ds rename to unused/makelib.ds diff --git a/makeproj.mac b/unused/makeproj.mac similarity index 100% rename from makeproj.mac rename to unused/makeproj.mac diff --git a/makljpeg.st b/unused/makljpeg.st similarity index 100% rename from makljpeg.st rename to unused/makljpeg.st diff --git a/maktjpeg.st b/unused/maktjpeg.st similarity index 100% rename from maktjpeg.st rename to unused/maktjpeg.st diff --git a/makvms.opt b/unused/makvms.opt similarity index 100% rename from makvms.opt rename to unused/makvms.opt diff --git a/unused/rdgif.c b/unused/rdgif.c new file mode 100644 index 0000000..b27c167 --- /dev/null +++ b/unused/rdgif.c @@ -0,0 +1,38 @@ +/* + * rdgif.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains routines to read input images in GIF format. + * + ***************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * the ability to read GIF files has been removed from the IJG distribution. * + * Sorry about that. * + ***************************************************************************** + * + * We are required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + +/* + * The module selection routine for GIF format input. + */ + +GLOBAL(cjpeg_source_ptr) +jinit_read_gif (j_compress_ptr cinfo) +{ + fprintf(stderr, "GIF input is unsupported for legal reasons. Sorry.\n"); + exit(EXIT_FAILURE); + return NULL; /* keep compiler happy */ +} + +#endif /* GIF_SUPPORTED */ diff --git a/unused/wrgif.c b/unused/wrgif.c new file mode 100644 index 0000000..5fe8328 --- /dev/null +++ b/unused/wrgif.c @@ -0,0 +1,399 @@ +/* + * wrgif.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains routines to write output images in GIF format. + * + ************************************************************************** + * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * + * this code has been modified to output "uncompressed GIF" files. * + * There is no trace of the LZW algorithm in this file. * + ************************************************************************** + * + * These routines may need modification for non-Unix environments or + * specialized applications. As they stand, they assume output to + * an ordinary stdio stream. + */ + +/* + * This code is loosely based on ppmtogif from the PBMPLUS distribution + * of Feb. 1991. That file contains the following copyright notice: + * Based on GIFENCODE by David Rowley . + * Lempel-Ziv compression based on "compress" by Spencer W. Thomas et al. + * Copyright (C) 1989 by Jef Poskanzer. + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, provided + * that the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. This software is provided "as is" without express or + * implied warranty. + * + * We are also required to state that + * "The Graphics Interchange Format(c) is the Copyright property of + * CompuServe Incorporated. GIF(sm) is a Service Mark property of + * CompuServe Incorporated." + */ + +#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ + +#ifdef GIF_SUPPORTED + + +/* Private version of data destination object */ + +typedef struct { + struct djpeg_dest_struct pub; /* public fields */ + + j_decompress_ptr cinfo; /* back link saves passing separate parm */ + + /* State for packing variable-width codes into a bitstream */ + int n_bits; /* current number of bits/code */ + int maxcode; /* maximum code, given n_bits */ + INT32 cur_accum; /* holds bits not yet output */ + int cur_bits; /* # of bits in cur_accum */ + + /* State for GIF code assignment */ + int ClearCode; /* clear code (doesn't change) */ + int EOFCode; /* EOF code (ditto) */ + int code_counter; /* counts output symbols */ + + /* GIF data packet construction buffer */ + int bytesinpkt; /* # of bytes in current packet */ + char packetbuf[256]; /* workspace for accumulating packet */ + +} gif_dest_struct; + +typedef gif_dest_struct * gif_dest_ptr; + +/* Largest value that will fit in N bits */ +#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) + + +/* + * Routines to package finished data bytes into GIF data blocks. + * A data block consists of a count byte (1..255) and that many data bytes. + */ + +LOCAL(void) +flush_packet (gif_dest_ptr dinfo) +/* flush any accumulated data */ +{ + if (dinfo->bytesinpkt > 0) { /* never write zero-length packet */ + dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++; + if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) + != (size_t) dinfo->bytesinpkt) + ERREXIT(dinfo->cinfo, JERR_FILE_WRITE); + dinfo->bytesinpkt = 0; + } +} + + +/* Add a character to current packet; flush to disk if necessary */ +#define CHAR_OUT(dinfo,c) \ + { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c); \ + if ((dinfo)->bytesinpkt >= 255) \ + flush_packet(dinfo); \ + } + + +/* Routine to convert variable-width codes into a byte stream */ + +LOCAL(void) +output (gif_dest_ptr dinfo, int code) +/* Emit a code of n_bits bits */ +/* Uses cur_accum and cur_bits to reblock into 8-bit bytes */ +{ + dinfo->cur_accum |= ((INT32) code) << dinfo->cur_bits; + dinfo->cur_bits += dinfo->n_bits; + + while (dinfo->cur_bits >= 8) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + dinfo->cur_accum >>= 8; + dinfo->cur_bits -= 8; + } +} + + +/* The pseudo-compression algorithm. + * + * In this module we simply output each pixel value as a separate symbol; + * thus, no compression occurs. In fact, there is expansion of one bit per + * pixel, because we use a symbol width one bit wider than the pixel width. + * + * GIF ordinarily uses variable-width symbols, and the decoder will expect + * to ratchet up the symbol width after a fixed number of symbols. + * To simplify the logic and keep the expansion penalty down, we emit a + * GIF Clear code to reset the decoder just before the width would ratchet up. + * Thus, all the symbols in the output file will have the same bit width. + * Note that emitting the Clear codes at the right times is a mere matter of + * counting output symbols and is in no way dependent on the LZW patent. + * + * With a small basic pixel width (low color count), Clear codes will be + * needed very frequently, causing the file to expand even more. So this + * simplistic approach wouldn't work too well on bilevel images, for example. + * But for output of JPEG conversions the pixel width will usually be 8 bits + * (129 to 256 colors), so the overhead added by Clear symbols is only about + * one symbol in every 256. + */ + +LOCAL(void) +compress_init (gif_dest_ptr dinfo, int i_bits) +/* Initialize pseudo-compressor */ +{ + /* init all the state variables */ + dinfo->n_bits = i_bits; + dinfo->maxcode = MAXCODE(dinfo->n_bits); + dinfo->ClearCode = (1 << (i_bits - 1)); + dinfo->EOFCode = dinfo->ClearCode + 1; + dinfo->code_counter = dinfo->ClearCode + 2; + /* init output buffering vars */ + dinfo->bytesinpkt = 0; + dinfo->cur_accum = 0; + dinfo->cur_bits = 0; + /* GIF specifies an initial Clear code */ + output(dinfo, dinfo->ClearCode); +} + + +LOCAL(void) +compress_pixel (gif_dest_ptr dinfo, int c) +/* Accept and "compress" one pixel value. + * The given value must be less than n_bits wide. + */ +{ + /* Output the given pixel value as a symbol. */ + output(dinfo, c); + /* Issue Clear codes often enough to keep the reader from ratcheting up + * its symbol size. + */ + if (dinfo->code_counter < dinfo->maxcode) { + dinfo->code_counter++; + } else { + output(dinfo, dinfo->ClearCode); + dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */ + } +} + + +LOCAL(void) +compress_term (gif_dest_ptr dinfo) +/* Clean up at end */ +{ + /* Send an EOF code */ + output(dinfo, dinfo->EOFCode); + /* Flush the bit-packing buffer */ + if (dinfo->cur_bits > 0) { + CHAR_OUT(dinfo, dinfo->cur_accum & 0xFF); + } + /* Flush the packet buffer */ + flush_packet(dinfo); +} + + +/* GIF header construction */ + + +LOCAL(void) +put_word (gif_dest_ptr dinfo, unsigned int w) +/* Emit a 16-bit word, LSB first */ +{ + putc(w & 0xFF, dinfo->pub.output_file); + putc((w >> 8) & 0xFF, dinfo->pub.output_file); +} + + +LOCAL(void) +put_3bytes (gif_dest_ptr dinfo, int val) +/* Emit 3 copies of same byte value --- handy subr for colormap construction */ +{ + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); + putc(val, dinfo->pub.output_file); +} + + +LOCAL(void) +emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) +/* Output the GIF file header, including color map */ +/* If colormap==NULL, synthesize a gray-scale colormap */ +{ + int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte; + int cshift = dinfo->cinfo->data_precision - 8; + int i; + + if (num_colors > 256) + ERREXIT1(dinfo->cinfo, JERR_TOO_MANY_COLORS, num_colors); + /* Compute bits/pixel and related values */ + BitsPerPixel = 1; + while (num_colors > (1 << BitsPerPixel)) + BitsPerPixel++; + ColorMapSize = 1 << BitsPerPixel; + if (BitsPerPixel <= 1) + InitCodeSize = 2; + else + InitCodeSize = BitsPerPixel; + /* + * Write the GIF header. + * Note that we generate a plain GIF87 header for maximum compatibility. + */ + putc('G', dinfo->pub.output_file); + putc('I', dinfo->pub.output_file); + putc('F', dinfo->pub.output_file); + putc('8', dinfo->pub.output_file); + putc('7', dinfo->pub.output_file); + putc('a', dinfo->pub.output_file); + /* Write the Logical Screen Descriptor */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + FlagByte = 0x80; /* Yes, there is a global color table */ + FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */ + FlagByte |= (BitsPerPixel-1); /* size of global color table */ + putc(FlagByte, dinfo->pub.output_file); + putc(0, dinfo->pub.output_file); /* Background color index */ + putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */ + /* Write the Global Color Map */ + /* If the color map is more than 8 bits precision, */ + /* we reduce it to 8 bits by shifting */ + for (i=0; i < ColorMapSize; i++) { + if (i < num_colors) { + if (colormap != NULL) { + if (dinfo->cinfo->out_color_space == JCS_RGB) { + /* Normal case: RGB color map */ + putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file); + putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file); + } else { + /* Grayscale "color map": possible if quantizing grayscale image */ + put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift); + } + } else { + /* Create a gray-scale map of num_colors values, range 0..255 */ + put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1)); + } + } else { + /* fill out the map to a power of 2 */ + put_3bytes(dinfo, 0); + } + } + /* Write image separator and Image Descriptor */ + putc(',', dinfo->pub.output_file); /* separator */ + put_word(dinfo, 0); /* left/top offset */ + put_word(dinfo, 0); + put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */ + put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + /* flag byte: not interlaced, no local color map */ + putc(0x00, dinfo->pub.output_file); + /* Write Initial Code Size byte */ + putc(InitCodeSize, dinfo->pub.output_file); + + /* Initialize for "compression" of image data */ + compress_init(dinfo, InitCodeSize+1); +} + + +/* + * Startup: write the file header. + */ + +METHODDEF(void) +start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + if (cinfo->quantize_colors) + emit_header(dest, cinfo->actual_number_of_colors, cinfo->colormap); + else + emit_header(dest, 256, (JSAMPARRAY) NULL); +} + + +/* + * Write some pixel data. + * In this module rows_supplied will always be 1. + */ + +METHODDEF(void) +put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + for (col = cinfo->output_width; col > 0; col--) { + compress_pixel(dest, GETJSAMPLE(*ptr++)); + } +} + + +/* + * Finish up at the end of the file. + */ + +METHODDEF(void) +finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + gif_dest_ptr dest = (gif_dest_ptr) dinfo; + + /* Flush "compression" mechanism */ + compress_term(dest); + /* Write a zero-length data block to end the series */ + putc(0, dest->pub.output_file); + /* Write the GIF terminator mark */ + putc(';', dest->pub.output_file); + /* Make sure we wrote the output file OK */ + fflush(dest->pub.output_file); + if (ferror(dest->pub.output_file)) + ERREXIT(cinfo, JERR_FILE_WRITE); +} + + +/* + * The module selection routine for GIF format output. + */ + +GLOBAL(djpeg_dest_ptr) +jinit_write_gif (j_decompress_ptr cinfo) +{ + gif_dest_ptr dest; + + /* Create module interface object, fill in method pointers */ + dest = (gif_dest_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(gif_dest_struct)); + dest->cinfo = cinfo; /* make back link for subroutines */ + dest->pub.start_output = start_output_gif; + dest->pub.put_pixel_rows = put_pixel_rows; + dest->pub.finish_output = finish_output_gif; + + if (cinfo->out_color_space != JCS_GRAYSCALE && + cinfo->out_color_space != JCS_RGB) + ERREXIT(cinfo, JERR_GIF_COLORSPACE); + + /* Force quantization if color or if > 8 bits input */ + if (cinfo->out_color_space != JCS_GRAYSCALE || cinfo->data_precision > 8) { + /* Force quantization to at most 256 colors */ + cinfo->quantize_colors = TRUE; + if (cinfo->desired_number_of_colors > 256) + cinfo->desired_number_of_colors = 256; + } + + /* Calculate output image dimensions so we can allocate space */ + jpeg_calc_output_dimensions(cinfo); + + if (cinfo->output_components != 1) /* safety check: just one component? */ + ERREXIT(cinfo, JERR_GIF_BUG); + + /* Create decompressor output buffer. */ + dest->pub.buffer = (*cinfo->mem->alloc_sarray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1); + dest->pub.buffer_height = 1; + + return (djpeg_dest_ptr) dest; +} + +#endif /* GIF_SUPPORTED */ diff --git a/vc6proj/apptest.dsp b/vc6proj/apptest.dsp new file mode 100644 index 0000000..0f5c35b --- /dev/null +++ b/vc6proj/apptest.dsp @@ -0,0 +1,242 @@ +# Microsoft Developer Studio Project File - Name="apptest" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Generic Project" 0x010a + +CFG=apptest - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "apptest.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "apptest.mak" CFG="apptest - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "apptest - Win32 Release" ("Win32 (x86) Generic Project" ÍÑ) +!MESSAGE "apptest - Win32 Debug" ("Win32 (x86) Generic Project" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +MTL=midl.exe + +!IF "$(CFG)" == "apptest - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Target_Dir "" +# Begin Special Build Tool +OutDir=.\Release +SOURCE="$(InputPath)" +PostBuild_Cmds=fc /b .\testimg.ppm $(OutDir)\testout.ppm fc /b .\testimg.bmp $(OutDir)\testout.bmp fc /b .\testimg.jpg $(OutDir)\testout.jpg fc /b .\testimg.ppm $(OutDir)\testoutp.ppm fc /b .\testimgp.jpg $(OutDir)\testoutp.jpg fc /b .\testorig.jpg $(OutDir)\testoutt.jpg +# End Special Build Tool + +!ELSEIF "$(CFG)" == "apptest - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Target_Dir "" +# Begin Special Build Tool +OutDir=.\Debug +SOURCE="$(InputPath)" +PostBuild_Cmds=fc /b .\testimg.ppm $(OutDir)\testout.ppm fc /b .\testimg.bmp $(OutDir)\testout.bmp fc /b .\testimg.jpg $(OutDir)\testout.jpg fc /b .\testimg.ppm $(OutDir)\testoutp.ppm fc /b .\testimgp.jpg $(OutDir)\testoutp.jpg fc /b .\testorig.jpg $(OutDir)\testoutt.jpg +# End Special Build Tool + +!ENDIF + +# Begin Target + +# Name "apptest - Win32 Release" +# Name "apptest - Win32 Debug" +# Begin Group "Test Image Files" + +# PROP Default_Filter "*.jpg;*.bmp;*.ppm" +# Begin Source File + +SOURCE=.\testimg.bmp +# End Source File +# Begin Source File + +SOURCE=.\testimg.jpg +# End Source File +# Begin Source File + +SOURCE=.\testimg.ppm + +!IF "$(CFG)" == "apptest - Win32 Release" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build +InputDir=. +OutDir=.\Release +InputPath=.\testimg.ppm + +BuildCmds= \ + echo $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \ + $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \ + echo $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \ + $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \ + + +"$(OutDir)\testout.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testoutp.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ELSEIF "$(CFG)" == "apptest - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build - Testing - $(InputPath) +InputDir=. +OutDir=.\Debug +InputPath=.\testimg.ppm + +BuildCmds= \ + echo $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \ + $(OutDir)\cjpeg -dct int -outfile $(OutDir)\testout.jpg .\testimg.ppm \ + echo $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \ + $(OutDir)\cjpeg -dct int -progressive -opt -outfile $(OutDir)\testoutp.jpg .\testimg.ppm \ + + +"$(OutDir)\testout.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testoutp.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\testimgp.jpg +# End Source File +# Begin Source File + +SOURCE=.\testorig.jpg + +!IF "$(CFG)" == "apptest - Win32 Release" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build +InputDir=. +OutDir=.\Release +InputPath=.\testorig.jpg + +BuildCmds= \ + echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \ + $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \ + echo $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \ + $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \ + + +"$(OutDir)\testout.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testout.bmp" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ELSEIF "$(CFG)" == "apptest - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build - Testing - $(InputPath) +InputDir=. +OutDir=.\Debug +InputPath=.\testorig.jpg + +BuildCmds= \ + echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \ + $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testout.ppm .\testorig.jpg \ + echo $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \ + $(OutDir)\djpeg -dct int -bmp -colors 256 -outfile $(OutDir)\testout.bmp .\testorig.jpg \ + + +"$(OutDir)\testout.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testout.bmp" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\testprog.jpg + +!IF "$(CFG)" == "apptest - Win32 Release" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build +InputDir=. +OutDir=.\Release +InputPath=.\testprog.jpg + +BuildCmds= \ + echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \ + $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \ + echo $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \ + $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \ + + +"$(OutDir)\testoutp.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testoutt.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ELSEIF "$(CFG)" == "apptest - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +# Begin Custom Build - Testing - $(InputPath) +InputDir=. +OutDir=.\Debug +InputPath=.\testprog.jpg + +BuildCmds= \ + echo $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \ + $(OutDir)\djpeg -dct int -ppm -outfile $(OutDir)\testoutp.ppm .\testprog.jpg \ + echo $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \ + $(OutDir)\jpegtran -outfile $(OutDir)\testoutt.jpg .\testprog.jpg \ + + +"$(OutDir)\testoutp.ppm" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) + +"$(OutDir)\testoutt.jpg" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(BuildCmds) +# End Custom Build + +!ENDIF + +# End Source File +# End Group +# End Target +# End Project diff --git a/vc6proj/cjpeg.dsp b/vc6proj/cjpeg.dsp new file mode 100644 index 0000000..573e619 --- /dev/null +++ b/vc6proj/cjpeg.dsp @@ -0,0 +1,164 @@ +# Microsoft Developer Studio Project File - Name="cjpeg" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=cjpeg - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "cjpeg.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "cjpeg.mak" CFG="cjpeg - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "cjpeg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "cjpeg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "cjpeg - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "cjpeg - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ENDIF + +# Begin Target + +# Name "cjpeg - Win32 Release" +# Name "cjpeg - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\cdjpeg.c +# End Source File +# Begin Source File + +SOURCE=.\cjpeg.c +# End Source File +# Begin Source File + +SOURCE=.\rdbmp.c +# End Source File +# Begin Source File + +SOURCE=.\rdgif.c +# End Source File +# Begin Source File + +SOURCE=.\rdppm.c +# End Source File +# Begin Source File + +SOURCE=.\rdrle.c +# End Source File +# Begin Source File + +SOURCE=.\rdswitch.c +# End Source File +# Begin Source File + +SOURCE=.\rdtarga.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\cderror.h +# End Source File +# Begin Source File + +SOURCE=.\cdjpeg.h +# End Source File +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jerror.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# Begin Source File + +SOURCE=.\jmorecfg.h +# End Source File +# Begin Source File + +SOURCE=.\jpeglib.h +# End Source File +# Begin Source File + +SOURCE=.\jversion.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/vc6proj/djpeg.dsp b/vc6proj/djpeg.dsp new file mode 100644 index 0000000..156b378 --- /dev/null +++ b/vc6proj/djpeg.dsp @@ -0,0 +1,164 @@ +# Microsoft Developer Studio Project File - Name="djpeg" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=djpeg - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "djpeg.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "djpeg.mak" CFG="djpeg - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "djpeg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "djpeg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "djpeg - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "djpeg - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ENDIF + +# Begin Target + +# Name "djpeg - Win32 Release" +# Name "djpeg - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\cdjpeg.c +# End Source File +# Begin Source File + +SOURCE=.\djpeg.c +# End Source File +# Begin Source File + +SOURCE=.\rdcolmap.c +# End Source File +# Begin Source File + +SOURCE=.\wrbmp.c +# End Source File +# Begin Source File + +SOURCE=.\wrgif.c +# End Source File +# Begin Source File + +SOURCE=.\wrppm.c +# End Source File +# Begin Source File + +SOURCE=.\wrrle.c +# End Source File +# Begin Source File + +SOURCE=.\wrtarga.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\cderror.h +# End Source File +# Begin Source File + +SOURCE=.\cdjpeg.h +# End Source File +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jerror.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# Begin Source File + +SOURCE=.\jmorecfg.h +# End Source File +# Begin Source File + +SOURCE=.\jpeglib.h +# End Source File +# Begin Source File + +SOURCE=.\jversion.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/vc6proj/jconfig.h b/vc6proj/jconfig.h new file mode 100644 index 0000000..d5bc9f9 --- /dev/null +++ b/vc6proj/jconfig.h @@ -0,0 +1,48 @@ +/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */ +/* see jconfig.doc for explanations */ + +#define HAVE_PROTOTYPES +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT +/* #define void char */ +/* #define const */ +#undef CHAR_IS_UNSIGNED +#define HAVE_STDDEF_H +#define HAVE_STDLIB_H +#undef NEED_BSD_STRINGS +#undef NEED_SYS_TYPES_H +#undef NEED_FAR_POINTERS /* we presume a 32-bit flat memory model */ +#undef NEED_SHORT_EXTERNAL_NAMES +#undef INCOMPLETE_TYPES_BROKEN + +/* Define "boolean" as unsigned char, not int, per Windows custom */ +#define TYPEDEF_UCHAR_BOOLEAN + +#ifdef JPEG_INTERNALS + +#undef RIGHT_SHIFT_IS_UNSIGNED + +#endif /* JPEG_INTERNALS */ + +#if defined(JPEG_INTERNALS) || defined(JPEG_INTERNAL_OPTIONS) +#undef JSIMD_MMX_NOT_SUPPORTED +#undef JSIMD_3DNOW_NOT_SUPPORTED +#undef JSIMD_SSE_NOT_SUPPORTED +#undef JSIMD_SSE2_NOT_SUPPORTED +#endif + +#ifdef JPEG_CJPEG_DJPEG + +#define BMP_SUPPORTED /* BMP image file format */ +#define GIF_SUPPORTED /* GIF image file format */ +#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ +#undef RLE_SUPPORTED /* Utah RLE image file format */ +#define TARGA_SUPPORTED /* Targa image file format */ + +#define TWO_FILE_COMMANDLINE /* optional */ +#define USE_SETMODE /* Microsoft has setmode() */ +#undef NEED_SIGNAL_CATCHER +#undef DONT_USE_B_MODE +#undef PROGRESS_REPORT /* optional */ + +#endif /* JPEG_CJPEG_DJPEG */ diff --git a/vc6proj/jpegtran.dsp b/vc6proj/jpegtran.dsp new file mode 100644 index 0000000..8dc38d4 --- /dev/null +++ b/vc6proj/jpegtran.dsp @@ -0,0 +1,156 @@ +# Microsoft Developer Studio Project File - Name="jpegtran" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=jpegtran - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "jpegtran.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "jpegtran.mak" CFG="jpegtran - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "jpegtran - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "jpegtran - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "jpegtran - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /machine:I386 /libpath:"Release" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "jpegtran - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 libjpeg.lib kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"Debug" /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ENDIF + +# Begin Target + +# Name "jpegtran - Win32 Release" +# Name "jpegtran - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\cdjpeg.c +# End Source File +# Begin Source File + +SOURCE=.\jpegtran.c +# End Source File +# Begin Source File + +SOURCE=.\rdswitch.c +# End Source File +# Begin Source File + +SOURCE=.\transupp.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\cderror.h +# End Source File +# Begin Source File + +SOURCE=.\cdjpeg.h +# End Source File +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jerror.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# Begin Source File + +SOURCE=.\jmorecfg.h +# End Source File +# Begin Source File + +SOURCE=.\jpegint.h +# End Source File +# Begin Source File + +SOURCE=.\jpeglib.h +# End Source File +# Begin Source File + +SOURCE=.\jversion.h +# End Source File +# Begin Source File + +SOURCE=.\transupp.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/vc6proj/libjpeg.dsp b/vc6proj/libjpeg.dsp new file mode 100644 index 0000000..59647d0 --- /dev/null +++ b/vc6proj/libjpeg.dsp @@ -0,0 +1,1751 @@ +# Microsoft Developer Studio Project File - Name="libjpeg" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Static Library" 0x0104 + +CFG=libjpeg - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "libjpeg.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "libjpeg.mak" CFG="libjpeg - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "libjpeg - Win32 Release" ("Win32 (x86) Static Library" ÍÑ) +!MESSAGE "libjpeg - Win32 Debug" ("Win32 (x86) Static Library" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_LIB" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /D "WIN32" /D "NDEBUG" /D "_LIB" /YX /Zl /FD /GF /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /YX /Zl /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LIB32=link.exe -lib +# ADD BASE LIB32 /nologo +# ADD LIB32 /nologo + +!ENDIF + +# Begin Target + +# Name "libjpeg - Win32 Release" +# Name "libjpeg - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\jcapimin.c +# End Source File +# Begin Source File + +SOURCE=.\jcapistd.c +# End Source File +# Begin Source File + +SOURCE=.\jccoefct.c +# End Source File +# Begin Source File + +SOURCE=.\jccolor.c +# End Source File +# Begin Source File + +SOURCE=.\jcdctmgr.c +# End Source File +# Begin Source File + +SOURCE=.\jchuff.c +# End Source File +# Begin Source File + +SOURCE=.\jcinit.c +# End Source File +# Begin Source File + +SOURCE=.\jcmainct.c +# End Source File +# Begin Source File + +SOURCE=.\jcmarker.c +# End Source File +# Begin Source File + +SOURCE=.\jcmaster.c +# End Source File +# Begin Source File + +SOURCE=.\jcomapi.c +# End Source File +# Begin Source File + +SOURCE=.\jcparam.c +# End Source File +# Begin Source File + +SOURCE=.\jcphuff.c +# End Source File +# Begin Source File + +SOURCE=.\jcprepct.c +# End Source File +# Begin Source File + +SOURCE=.\jcsample.c +# End Source File +# Begin Source File + +SOURCE=.\jctrans.c +# End Source File +# Begin Source File + +SOURCE=.\jdapimin.c +# End Source File +# Begin Source File + +SOURCE=.\jdapistd.c +# End Source File +# Begin Source File + +SOURCE=.\jdatadst.c +# End Source File +# Begin Source File + +SOURCE=.\jdatasrc.c +# End Source File +# Begin Source File + +SOURCE=.\jdcoefct.c +# End Source File +# Begin Source File + +SOURCE=.\jdcolor.c +# End Source File +# Begin Source File + +SOURCE=.\jddctmgr.c +# End Source File +# Begin Source File + +SOURCE=.\jdhuff.c +# End Source File +# Begin Source File + +SOURCE=.\jdinput.c +# End Source File +# Begin Source File + +SOURCE=.\jdmainct.c +# End Source File +# Begin Source File + +SOURCE=.\jdmarker.c +# End Source File +# Begin Source File + +SOURCE=.\jdmaster.c +# End Source File +# Begin Source File + +SOURCE=.\jdmerge.c +# End Source File +# Begin Source File + +SOURCE=.\jdphuff.c +# End Source File +# Begin Source File + +SOURCE=.\jdpostct.c +# End Source File +# Begin Source File + +SOURCE=.\jdsample.c +# End Source File +# Begin Source File + +SOURCE=.\jdtrans.c +# End Source File +# Begin Source File + +SOURCE=.\jerror.c +# End Source File +# Begin Source File + +SOURCE=.\jmemmgr.c +# End Source File +# Begin Source File + +SOURCE=.\jmemnobs.c +# End Source File +# Begin Source File + +SOURCE=.\jquant1.c +# End Source File +# Begin Source File + +SOURCE=.\jquant2.c +# End Source File +# Begin Source File + +SOURCE=.\jutils.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\jchuff.h +# End Source File +# Begin Source File + +SOURCE=.\jcolsamp.h +# End Source File +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jdct.h +# End Source File +# Begin Source File + +SOURCE=.\jdhuff.h +# End Source File +# Begin Source File + +SOURCE=.\jerror.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# Begin Source File + +SOURCE=.\jmemsys.h +# End Source File +# Begin Source File + +SOURCE=.\jmorecfg.h +# End Source File +# Begin Source File + +SOURCE=.\jpegint.h +# End Source File +# Begin Source File + +SOURCE=.\jpeglib.h +# End Source File +# Begin Source File + +SOURCE=.\jversion.h +# End Source File +# End Group +# Begin Group "NASM Source" + +# PROP Default_Filter "asm" +# Begin Source File + +SOURCE=.\jccolmmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCCOL="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jccolmmx.asm +InputName=jccolmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCCOL="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jccolmmx.asm +InputName=jccolmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jccolss2.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCCOLS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jccolss2.asm +InputName=jccolss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCCOLS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jccolss2.asm +InputName=jccolss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqnt3dn.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqnt3dn.asm +InputName=jcqnt3dn + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqnt3dn.asm +InputName=jcqnt3dn + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqntflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqntflt.asm +InputName=jcqntflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqntflt.asm +InputName=jcqntflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqntint.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqntint.asm +InputName=jcqntint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqntint.asm +InputName=jcqntint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqntmmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqntmmx.asm +InputName=jcqntmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqntmmx.asm +InputName=jcqntmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqnts2f.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqnts2f.asm +InputName=jcqnts2f + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqnts2f.asm +InputName=jcqnts2f + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqnts2i.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqnts2i.asm +InputName=jcqnts2i + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqnts2i.asm +InputName=jcqnts2i + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcqntsse.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTSS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcqntsse.asm +InputName=jcqntsse + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCQNTSS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcqntsse.asm +InputName=jcqntsse + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcsammmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCSAM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcsammmx.asm +InputName=jcsammmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCSAM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcsammmx.asm +InputName=jcsammmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jcsamss2.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCSAMS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jcsamss2.asm +InputName=jcsamss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JCSAMS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jcsamss2.asm +InputName=jcsamss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdcolmmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDCOL="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdcolmmx.asm +InputName=jdcolmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDCOL="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdcolmmx.asm +InputName=jdcolmmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdcolss2.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDCOLS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdcolss2.asm +InputName=jdcolss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDCOLS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdcolss2.asm +InputName=jdcolss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdmermmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDMER="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdmermmx.asm +InputName=jdmermmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDMER="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdmermmx.asm +InputName=jdmermmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdmerss2.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDMERS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdmerss2.asm +InputName=jdmerss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDMERS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdmerss2.asm +InputName=jdmerss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdsammmx.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDSAM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdsammmx.asm +InputName=jdsammmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDSAM="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdsammmx.asm +InputName=jdsammmx + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jdsamss2.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDSAMS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jdsamss2.asm +InputName=jdsamss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JDSAMS="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jcolsamp.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jdsamss2.asm +InputName=jdsamss2 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jf3dnflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JF3DN="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jf3dnflt.asm +InputName=jf3dnflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JF3DN="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jf3dnflt.asm +InputName=jf3dnflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfdctflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfdctflt.asm +InputName=jfdctflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfdctflt.asm +InputName=jfdctflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfdctfst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfdctfst.asm +InputName=jfdctfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfdctfst.asm +InputName=jfdctfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfdctint.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfdctint.asm +InputName=jfdctint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFDCTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfdctint.asm +InputName=jfdctint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfmmxfst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFMMX="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfmmxfst.asm +InputName=jfmmxfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFMMX="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfmmxfst.asm +InputName=jfmmxfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfmmxint.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFMMXI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfmmxint.asm +InputName=jfmmxint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFMMXI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfmmxint.asm +InputName=jfmmxint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfss2fst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfss2fst.asm +InputName=jfss2fst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfss2fst.asm +InputName=jfss2fst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfss2int.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSS2I="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfss2int.asm +InputName=jfss2int + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSS2I="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfss2int.asm +InputName=jfss2int + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jfsseflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSSE="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jfsseflt.asm +InputName=jfsseflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JFSSE="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jfsseflt.asm +InputName=jfsseflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\ji3dnflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JI3DN="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\ji3dnflt.asm +InputName=ji3dnflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JI3DN="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\ji3dnflt.asm +InputName=ji3dnflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jidctflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jidctflt.asm +InputName=jidctflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCT="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jidctflt.asm +InputName=jidctflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jidctfst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jidctfst.asm +InputName=jidctfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTF="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jidctfst.asm +InputName=jidctfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jidctint.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jidctint.asm +InputName=jidctint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jidctint.asm +InputName=jidctint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jidctred.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTR="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jidctred.asm +InputName=jidctred + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIDCTR="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jidctred.asm +InputName=jidctred + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jimmxfst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMX="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jimmxfst.asm +InputName=jimmxfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMX="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jimmxfst.asm +InputName=jimmxfst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jimmxint.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMXI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jimmxint.asm +InputName=jimmxint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMXI="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jimmxint.asm +InputName=jimmxint + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jimmxred.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMXR="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jimmxred.asm +InputName=jimmxred + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JIMMXR="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jimmxred.asm +InputName=jimmxred + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jiss2flt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jiss2flt.asm +InputName=jiss2flt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jiss2flt.asm +InputName=jiss2flt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jiss2fst.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2F="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jiss2fst.asm +InputName=jiss2fst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2F="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jiss2fst.asm +InputName=jiss2fst + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jiss2int.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2I="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jiss2int.asm +InputName=jiss2int + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2I="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jiss2int.asm +InputName=jiss2int + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jiss2red.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2R="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jiss2red.asm +InputName=jiss2red + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISS2R="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jiss2red.asm +InputName=jiss2red + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jisseflt.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISSE="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jisseflt.asm +InputName=jisseflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JISSE="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" "jdct.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jisseflt.asm +InputName=jisseflt + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jsimdcpu.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JSIMD="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jsimdcpu.asm +InputName=jsimdcpu + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JSIMD="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jsimdcpu.asm +InputName=jsimdcpu + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\jsimdw32.asm + +!IF "$(CFG)" == "libjpeg - Win32 Release" + +# PROP Ignore_Default_Tool 1 +USERDEP__JSIMDW="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Release +InputPath=.\jsimdw32.asm +InputName=jsimdw32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ELSEIF "$(CFG)" == "libjpeg - Win32 Debug" + +# PROP Ignore_Default_Tool 1 +USERDEP__JSIMDW="$(IntDir)\jsimdcfg.inc" "jsimdext.inc" +# Begin Custom Build - Assembling - $(InputPath) +IntDir=.\Debug +InputPath=.\jsimdw32.asm +InputName=jsimdw32 + +"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + nasmw -Xvc -fwin32 -DWIN32 -I $(IntDir)\ -o $(IntDir)\$(InputName).obj $(InputPath) + +# End Custom Build + +!ENDIF + +# End Source File +# End Group +# Begin Group "NASM Header" + +# PROP Default_Filter "inc" +# Begin Source File + +SOURCE=.\jcolsamp.inc +# End Source File +# Begin Source File + +SOURCE=.\jdct.inc +# End Source File +# Begin Source File + +SOURCE=.\jsimdext.inc +# End Source File +# End Group +# End Target +# End Project diff --git a/vc6proj/libjpeg.dsw b/vc6proj/libjpeg.dsw new file mode 100644 index 0000000..4ace153 --- /dev/null +++ b/vc6proj/libjpeg.dsw @@ -0,0 +1,134 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00 +# ·Ù¹ð: ¤³¤ÎŽÜްޏ޽ŽÍŽßް޽ ŽÌާ޲ŽÙ ¤òÊÔ½¸¤Þ¤¿¤Ïºï½ü¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤! + +############################################################################### + +Project: "apptest"=".\apptest.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name cjpeg + End Project Dependency + Begin Project Dependency + Project_Dep_Name djpeg + End Project Dependency + Begin Project Dependency + Project_Dep_Name jpegtran + End Project Dependency +}}} + +############################################################################### + +Project: "cjpeg"=".\cjpeg.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name libjpeg + End Project Dependency +}}} + +############################################################################### + +Project: "djpeg"=".\djpeg.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name libjpeg + End Project Dependency +}}} + +############################################################################### + +Project: "jpegtran"=".\jpegtran.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name libjpeg + End Project Dependency +}}} + +############################################################################### + +Project: "libjpeg"=".\libjpeg.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ + Begin Project Dependency + Project_Dep_Name makecfg + End Project Dependency +}}} + +############################################################################### + +Project: "makecfg"=".\makecfg.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Project: "rdjpgcom"=".\rdjpgcom.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Project: "wrjpgcom"=".\wrjpgcom.dsp" - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + diff --git a/vc6proj/makecfg.dsp b/vc6proj/makecfg.dsp new file mode 100644 index 0000000..dbe914a --- /dev/null +++ b/vc6proj/makecfg.dsp @@ -0,0 +1,142 @@ +# Microsoft Developer Studio Project File - Name="makecfg" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=makecfg - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "makecfg.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "makecfg.mak" CFG="makecfg - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "makecfg - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "makecfg - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "makecfg - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 /nologo /subsystem:console /machine:I386 /opt:nowin98 +# SUBTRACT LINK32 /pdb:none +# Begin Custom Build - Generating - $(OutDir)\jsimdcfg.inc +OutDir=.\Release +InputPath=.\Release\makecfg.exe +SOURCE="$(InputPath)" + +"$(OutDir)\jsimdcfg.inc" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(TargetPath) > $(OutDir)\jsimdcfg.inc + +# End Custom Build + +!ELSEIF "$(CFG)" == "makecfg - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98 +# SUBTRACT LINK32 /pdb:none +# Begin Custom Build - Generating - $(OutDir)\jsimdcfg.inc +OutDir=.\Debug +InputPath=.\Debug\makecfg.exe +SOURCE="$(InputPath)" + +"$(OutDir)\jsimdcfg.inc" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + $(TargetPath) > $(OutDir)\jsimdcfg.inc + +# End Custom Build + +!ENDIF + +# Begin Target + +# Name "makecfg - Win32 Release" +# Name "makecfg - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\makecfg.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jerror.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# Begin Source File + +SOURCE=.\jmorecfg.h +# End Source File +# Begin Source File + +SOURCE=.\jpegint.h +# End Source File +# Begin Source File + +SOURCE=.\jpeglib.h +# End Source File +# End Group +# End Target +# End Project diff --git a/vc6proj/rdjpgcom.dsp b/vc6proj/rdjpgcom.dsp new file mode 100644 index 0000000..7a5eda2 --- /dev/null +++ b/vc6proj/rdjpgcom.dsp @@ -0,0 +1,112 @@ +# Microsoft Developer Studio Project File - Name="rdjpgcom" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=rdjpgcom - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "rdjpgcom.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "rdjpgcom.mak" CFG="rdjpgcom - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "rdjpgcom - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "rdjpgcom - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "rdjpgcom - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 kernel32.lib /nologo /subsystem:console /machine:I386 /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "rdjpgcom - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ENDIF + +# Begin Target + +# Name "rdjpgcom - Win32 Release" +# Name "rdjpgcom - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\rdjpgcom.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/vc6proj/wrjpgcom.dsp b/vc6proj/wrjpgcom.dsp new file mode 100644 index 0000000..7fdf9ec --- /dev/null +++ b/vc6proj/wrjpgcom.dsp @@ -0,0 +1,112 @@ +# Microsoft Developer Studio Project File - Name="wrjpgcom" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** ÊÔ½¸¤·¤Ê¤¤¤Ç¤¯¤À¤µ¤¤ ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=wrjpgcom - Win32 Debug +!MESSAGE ¤³¤ì¤ÏÍ­¸ú¤ÊŽÒ޲ޏŽÌާ޲ŽÙ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó¡£ ¤³¤ÎŽÌŽßŽÛ޼ŽÞŽªŽ¸ŽÄ¤òŽËŽÞŽÙŽÄŽÞ¤¹¤ë¤¿¤á¤Ë¤Ï NMAKE ¤ò»ÈÍѤ·¤Æ¤¯¤À¤µ¤¤¡£ +!MESSAGE [ŽÒ޲ޏŽÌާ޲ŽÙ¤Î޴ޏ޽ŽÎŽßްŽÄ] ŽºŽÏŽÝŽÄŽÞ¤ò»ÈÍѤ·¤Æ¼Â¹Ô¤·¤Æ¤¯¤À¤µ¤¤ +!MESSAGE +!MESSAGE NMAKE /f "wrjpgcom.mak". +!MESSAGE +!MESSAGE NMAKE ¤Î¼Â¹Ô»þ¤Ë¹½À®¤ò»ØÄê¤Ç¤­¤Þ¤¹ +!MESSAGE ŽºŽÏŽÝŽÄŽÞ Ž×޲ŽÝ¾å¤ÇŽÏޏŽÛ¤ÎÀßÄê¤òÄêµÁ¤·¤Þ¤¹¡£Îã: +!MESSAGE +!MESSAGE NMAKE /f "wrjpgcom.mak" CFG="wrjpgcom - Win32 Debug" +!MESSAGE +!MESSAGE ÁªÂò²Äǽ¤ÊŽËŽÞŽÙŽÄŽÞ ŽÓްŽÄŽÞ: +!MESSAGE +!MESSAGE "wrjpgcom - Win32 Release" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE "wrjpgcom - Win32 Debug" ("Win32 (x86) Console Application" ÍÑ) +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "wrjpgcom - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD CPP /nologo /W3 /O2 /GF /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c +# ADD BASE RSC /l 0x411 /d "NDEBUG" +# ADD RSC /l 0x411 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 kernel32.lib /nologo /subsystem:console /machine:I386 /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ELSEIF "$(CFG)" == "wrjpgcom - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /GZ /c +# ADD BASE RSC /l 0x411 /d "_DEBUG" +# ADD RSC /l 0x411 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 kernel32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /opt:nowin98 +# SUBTRACT LINK32 /pdb:none + +!ENDIF + +# Begin Target + +# Name "wrjpgcom - Win32 Release" +# Name "wrjpgcom - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\wrjpgcom.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\jconfig.h +# End Source File +# Begin Source File + +SOURCE=.\jinclude.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/wrbmp.c b/wrbmp.c index 3283b0f..517441a 100644 --- a/wrbmp.c +++ b/wrbmp.c @@ -5,6 +5,13 @@ * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * + * --------------------------------------------------------------------- + * x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * This file has been modified to improve performance. + * Last Modified : October 19, 2004 + * --------------------------------------------------------------------- + * * This file contains routines to write output images in Microsoft "BMP" * format (MS Windows 3.x and OS/2 1.x flavors). * Either 8-bit colormapped or 24-bit full-color format can be written. @@ -346,9 +353,11 @@ finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; register FILE * outfile = dest->pub.output_file; JSAMPARRAY image_ptr; +#if (BITS_IN_JSAMPLE != 8) || defined(NEED_FAR_POINTERS) register JSAMPROW data_ptr; - JDIMENSION row; register JDIMENSION col; +#endif + JDIMENSION row; cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; /* Write the header and colormap */ @@ -366,11 +375,17 @@ finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) } image_ptr = (*cinfo->mem->access_virt_sarray) ((j_common_ptr) cinfo, dest->whole_image, row-1, (JDIMENSION) 1, FALSE); +#if (BITS_IN_JSAMPLE == 8) && !defined(NEED_FAR_POINTERS) + if (JFWRITE(outfile, image_ptr[0], dest->row_width) + != (size_t) dest->row_width) + ERREXIT(cinfo, JERR_FILE_WRITE); +#else data_ptr = image_ptr[0]; for (col = dest->row_width; col > 0; col--) { putc(GETJSAMPLE(*data_ptr), outfile); data_ptr++; } +#endif } if (progress != NULL) progress->completed_extra_passes++; diff --git a/wrgif.c b/wrgif.c index 5fe8328..85cfaa8 100644 --- a/wrgif.c +++ b/wrgif.c @@ -1,18 +1,17 @@ /* * wrgif.c * - * Copyright (C) 1991-1997, Thomas G. Lane. + * Copyright (C) 1991-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * - * This file contains routines to write output images in GIF format. - * ************************************************************************** - * NOTE: to avoid entanglements with Unisys' patent on LZW compression, * - * this code has been modified to output "uncompressed GIF" files. * - * There is no trace of the LZW algorithm in this file. * + * WARNING: You will need an LZW patent license from Unisys in order to * + * use this file legally in any commercial or shareware application. * ************************************************************************** * + * This file contains routines to write output images in GIF format. + * * These routines may need modification for non-Unix environments or * specialized applications. As they stand, they assume output to * an ordinary stdio stream. @@ -42,6 +41,40 @@ #ifdef GIF_SUPPORTED +#define MAX_LZW_BITS 12 /* maximum LZW code size (4096 symbols) */ + +typedef INT16 code_int; /* must hold -1 .. 2**MAX_LZW_BITS */ + +#define LZW_TABLE_SIZE ((code_int) 1 << MAX_LZW_BITS) + +#define HSIZE 5003 /* hash table size for 80% occupancy */ + +typedef int hash_int; /* must hold -2*HSIZE..2*HSIZE */ + +#define MAXCODE(n_bits) (((code_int) 1 << (n_bits)) - 1) + + +/* + * The LZW hash table consists of two parallel arrays: + * hash_code[i] code of symbol in slot i, or 0 if empty slot + * hash_value[i] symbol's value; undefined if empty slot + * where slot values (i) range from 0 to HSIZE-1. The symbol value is + * its prefix symbol's code concatenated with its suffix character. + * + * Algorithm: use open addressing double hashing (no chaining) on the + * prefix code / suffix character combination. We do a variant of Knuth's + * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime + * secondary probe. + * + * The hash_value[] table is allocated from FAR heap space since it would + * use up rather a lot of the near data space in a PC. + */ + +typedef INT32 hash_entry; /* must hold (code_int<<8) | byte */ + +#define HASH_ENTRY(prefix,suffix) ((((hash_entry) (prefix)) << 8) | (suffix)) + + /* Private version of data destination object */ typedef struct { @@ -51,14 +84,23 @@ typedef struct { /* State for packing variable-width codes into a bitstream */ int n_bits; /* current number of bits/code */ - int maxcode; /* maximum code, given n_bits */ + code_int maxcode; /* maximum code, given n_bits */ + int init_bits; /* initial n_bits ... restored after clear */ INT32 cur_accum; /* holds bits not yet output */ int cur_bits; /* # of bits in cur_accum */ - /* State for GIF code assignment */ - int ClearCode; /* clear code (doesn't change) */ - int EOFCode; /* EOF code (ditto) */ - int code_counter; /* counts output symbols */ + /* LZW string construction */ + code_int waiting_code; /* symbol not yet output; may be extendable */ + boolean first_byte; /* if TRUE, waiting_code is not valid */ + + /* State for LZW code assignment */ + code_int ClearCode; /* clear code (doesn't change) */ + code_int EOFCode; /* EOF code (ditto) */ + code_int free_code; /* first not-yet-used symbol code */ + + /* LZW hash table */ + code_int *hash_code; /* => hash table of symbol codes */ + hash_entry FAR *hash_value; /* => hash table of symbol values */ /* GIF data packet construction buffer */ int bytesinpkt; /* # of bytes in current packet */ @@ -68,12 +110,9 @@ typedef struct { typedef gif_dest_struct * gif_dest_ptr; -/* Largest value that will fit in N bits */ -#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) - /* - * Routines to package finished data bytes into GIF data blocks. + * Routines to package compressed data bytes into GIF data blocks. * A data block consists of a count byte (1..255) and that many data bytes. */ @@ -102,7 +141,7 @@ flush_packet (gif_dest_ptr dinfo) /* Routine to convert variable-width codes into a byte stream */ LOCAL(void) -output (gif_dest_ptr dinfo, int code) +output (gif_dest_ptr dinfo, code_int code) /* Emit a code of n_bits bits */ /* Uses cur_accum and cur_bits to reblock into 8-bit bytes */ { @@ -114,67 +153,123 @@ output (gif_dest_ptr dinfo, int code) dinfo->cur_accum >>= 8; dinfo->cur_bits -= 8; } + + /* + * If the next entry is going to be too big for the code size, + * then increase it, if possible. We do this here to ensure + * that it's done in sync with the decoder's codesize increases. + */ + if (dinfo->free_code > dinfo->maxcode) { + dinfo->n_bits++; + if (dinfo->n_bits == MAX_LZW_BITS) + dinfo->maxcode = LZW_TABLE_SIZE; /* free_code will never exceed this */ + else + dinfo->maxcode = MAXCODE(dinfo->n_bits); + } } -/* The pseudo-compression algorithm. - * - * In this module we simply output each pixel value as a separate symbol; - * thus, no compression occurs. In fact, there is expansion of one bit per - * pixel, because we use a symbol width one bit wider than the pixel width. - * - * GIF ordinarily uses variable-width symbols, and the decoder will expect - * to ratchet up the symbol width after a fixed number of symbols. - * To simplify the logic and keep the expansion penalty down, we emit a - * GIF Clear code to reset the decoder just before the width would ratchet up. - * Thus, all the symbols in the output file will have the same bit width. - * Note that emitting the Clear codes at the right times is a mere matter of - * counting output symbols and is in no way dependent on the LZW patent. - * - * With a small basic pixel width (low color count), Clear codes will be - * needed very frequently, causing the file to expand even more. So this - * simplistic approach wouldn't work too well on bilevel images, for example. - * But for output of JPEG conversions the pixel width will usually be 8 bits - * (129 to 256 colors), so the overhead added by Clear symbols is only about - * one symbol in every 256. - */ +/* The LZW algorithm proper */ + + +LOCAL(void) +clear_hash (gif_dest_ptr dinfo) +/* Fill the hash table with empty entries */ +{ + /* It's sufficient to zero hash_code[] */ + MEMZERO(dinfo->hash_code, HSIZE * SIZEOF(code_int)); +} + + +LOCAL(void) +clear_block (gif_dest_ptr dinfo) +/* Reset compressor and issue a Clear code */ +{ + clear_hash(dinfo); /* delete all the symbols */ + dinfo->free_code = dinfo->ClearCode + 2; + output(dinfo, dinfo->ClearCode); /* inform decoder */ + dinfo->n_bits = dinfo->init_bits; /* reset code size */ + dinfo->maxcode = MAXCODE(dinfo->n_bits); +} + LOCAL(void) compress_init (gif_dest_ptr dinfo, int i_bits) -/* Initialize pseudo-compressor */ +/* Initialize LZW compressor */ { /* init all the state variables */ - dinfo->n_bits = i_bits; + dinfo->n_bits = dinfo->init_bits = i_bits; dinfo->maxcode = MAXCODE(dinfo->n_bits); - dinfo->ClearCode = (1 << (i_bits - 1)); + dinfo->ClearCode = ((code_int) 1 << (i_bits - 1)); dinfo->EOFCode = dinfo->ClearCode + 1; - dinfo->code_counter = dinfo->ClearCode + 2; + dinfo->free_code = dinfo->ClearCode + 2; + dinfo->first_byte = TRUE; /* no waiting symbol yet */ /* init output buffering vars */ dinfo->bytesinpkt = 0; dinfo->cur_accum = 0; dinfo->cur_bits = 0; + /* clear hash table */ + clear_hash(dinfo); /* GIF specifies an initial Clear code */ output(dinfo, dinfo->ClearCode); } LOCAL(void) -compress_pixel (gif_dest_ptr dinfo, int c) -/* Accept and "compress" one pixel value. - * The given value must be less than n_bits wide. - */ +compress_byte (gif_dest_ptr dinfo, int c) +/* Accept and compress one 8-bit byte */ { - /* Output the given pixel value as a symbol. */ - output(dinfo, c); - /* Issue Clear codes often enough to keep the reader from ratcheting up - * its symbol size. + register hash_int i; + register hash_int disp; + register hash_entry probe_value; + + if (dinfo->first_byte) { /* need to initialize waiting_code */ + dinfo->waiting_code = c; + dinfo->first_byte = FALSE; + return; + } + + /* Probe hash table to see if a symbol exists for + * waiting_code followed by c. + * If so, replace waiting_code by that symbol and return. */ - if (dinfo->code_counter < dinfo->maxcode) { - dinfo->code_counter++; - } else { - output(dinfo, dinfo->ClearCode); - dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */ + i = ((hash_int) c << (MAX_LZW_BITS-8)) + dinfo->waiting_code; + /* i is less than twice 2**MAX_LZW_BITS, therefore less than twice HSIZE */ + if (i >= HSIZE) + i -= HSIZE; + + probe_value = HASH_ENTRY(dinfo->waiting_code, c); + + if (dinfo->hash_code[i] != 0) { /* is first probed slot empty? */ + if (dinfo->hash_value[i] == probe_value) { + dinfo->waiting_code = dinfo->hash_code[i]; + return; + } + if (i == 0) /* secondary hash (after G. Knott) */ + disp = 1; + else + disp = HSIZE - i; + for (;;) { + i -= disp; + if (i < 0) + i += HSIZE; + if (dinfo->hash_code[i] == 0) + break; /* hit empty slot */ + if (dinfo->hash_value[i] == probe_value) { + dinfo->waiting_code = dinfo->hash_code[i]; + return; + } + } } + + /* here when hashtable[i] is an empty slot; desired symbol not in table */ + output(dinfo, dinfo->waiting_code); + if (dinfo->free_code < LZW_TABLE_SIZE) { + dinfo->hash_code[i] = dinfo->free_code++; /* add symbol to hashtable */ + dinfo->hash_value[i] = probe_value; + } else + clear_block(dinfo); + dinfo->waiting_code = c; } @@ -182,6 +277,9 @@ LOCAL(void) compress_term (gif_dest_ptr dinfo) /* Clean up at end */ { + /* Flush out the buffered code */ + if (! dinfo->first_byte) + output(dinfo, dinfo->waiting_code); /* Send an EOF code */ output(dinfo, dinfo->EOFCode); /* Flush the bit-packing buffer */ @@ -289,7 +387,7 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) /* Write Initial Code Size byte */ putc(InitCodeSize, dinfo->pub.output_file); - /* Initialize for "compression" of image data */ + /* Initialize for LZW compression of image data */ compress_init(dinfo, InitCodeSize+1); } @@ -325,7 +423,7 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, ptr = dest->pub.buffer[0]; for (col = cinfo->output_width; col > 0; col--) { - compress_pixel(dest, GETJSAMPLE(*ptr++)); + compress_byte(dest, GETJSAMPLE(*ptr++)); } } @@ -339,7 +437,7 @@ finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { gif_dest_ptr dest = (gif_dest_ptr) dinfo; - /* Flush "compression" mechanism */ + /* Flush LZW mechanism */ compress_term(dest); /* Write a zero-length data block to end the series */ putc(0, dest->pub.output_file); @@ -393,6 +491,14 @@ jinit_write_gif (j_decompress_ptr cinfo) ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1); dest->pub.buffer_height = 1; + /* Allocate space for hash table */ + dest->hash_code = (code_int *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + HSIZE * SIZEOF(code_int)); + dest->hash_value = (hash_entry FAR *) + (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + HSIZE * SIZEOF(hash_entry)); + return (djpeg_dest_ptr) dest; } -- 2.50.1