]> granicus.if.org Git - libjpeg-turbo/commitdiff
Merge branch 'master' into dev
authorDRC <information@libjpeg-turbo.org>
Sat, 17 Mar 2018 05:27:49 +0000 (00:27 -0500)
committerDRC <information@libjpeg-turbo.org>
Sat, 17 Mar 2018 05:27:49 +0000 (00:27 -0500)
1  2 
ChangeLog.md
jmemmgr.c
release/installer.nsi.in
simd/arm/jsimd.c
simd/arm64/jsimd.c
simd/i386/jsimd.c
simd/loongson/jsimd.c
simd/mips/jsimd.c
simd/powerpc/jsimd.c
simd/x86_64/jsimd.c
turbojpeg.c

diff --cc ChangeLog.md
index 3a904a53490d7445583dcefac7f17be4ed0dfcea,d6998d8cc96cd90df1bcf57fc07328a1d28483fc..ccf98a8523528c6a3650ce03d957286b47992faa
@@@ -107,18 -11,9 +107,21 @@@ actual security issues, should they ari
  algorithm that caused incorrect dithering in the output image.  This algorithm
  now produces bitwise-identical results to the unmerged algorithms.
  
 -3. Fixed a build error when building with older MinGW releases (regression
 +12. The SIMD function symbols for x86[-64]/ELF, MIPS/ELF, macOS/x86[-64] (if
 +libjpeg-turbo is built with YASM), and iOS/ARM[64] builds are now private.
 +This prevents those symbols from being exposed in applications or shared
 +libraries that link statically with libjpeg-turbo.
 +
 +13. Added Loongson MMI SIMD implementations of the RGB-to-YCbCr and
 +YCbCr-to-RGB colorspace conversion, 4:2:0 chroma downsampling, 4:2:0 fancy
 +chroma upsampling, integer quantization, and slow integer DCT/IDCT algorithms.
 +When using the slow integer DCT/IDCT, this speeds up the compression of RGB
 +images by approximately 70-100% and the decompression of RGB images by
 +approximately 2-3.5x.
 +
++14. Fixed a build error when building with older MinGW releases (regression
+ caused by 1.5.1[7].)
  
  1.5.3
  =====
diff --cc jmemmgr.c
Simple merge
index ec03f5e3079380603e0c8d94f3bf566a583ee70c,6fd1a809fc4ed26999b78b69b73dbdea1b80ad4e..02db342a8a7d0f6b6705a89fbf3e8c384b813f0d
@@@ -1,7 -1,7 +1,7 @@@
  !include x64.nsh
  Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@"
 -OutFile "@CMAKE_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe"
 +OutFile "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe"
- InstallDir @INST_DIR@
+ InstallDir "@INST_DIR@"
  
  SetCompressor bzip2
  
index 8fcd6e38fd3148d146f46f37de17ad0b73f5bd44,0f0a88731a15cf520af466087ba98225d916adf5..d118bead146d5e4a353381901e7f8d85727299ad
@@@ -100,9 -98,11 +100,11 @@@ parse_proc_cpuinfo(int bufsize
   * FIXME: This code is racy under a multi-threaded environment.
   */
  LOCAL(void)
 -init_simd (void)
 +init_simd(void)
  {
+ #ifndef NO_GETENV
    char *env = NULL;
+ #endif
  #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    int bufsize = 1024; /* an initial guess for the line buffer size limit */
  #endif
index b7f019d2616749f18d0a65c8524539f914b5e289,e2e7b0086f86aa9370c862c9832644cea7f0d081..54806e58a67e7b9d3aaa6167afaee85c9dfa0511
@@@ -120,9 -119,11 +120,11 @@@ parse_proc_cpuinfo(int bufsize
  
  
  LOCAL(void)
 -init_simd (void)
 +init_simd(void)
  {
+ #ifndef NO_GETENV
    char *env = NULL;
+ #endif
  #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    int bufsize = 1024; /* an initial guess for the line buffer size limit */
  #endif
index 1e223848f0e2d6c4bb5ee1762dece1ded4a2aecc,9e274e2a45ff2ea41023cbc63065ac371001f85a..d25153194447865cf40a6299be3e37cdb68046e6
@@@ -40,9 -39,11 +40,11 @@@ static unsigned int simd_huffman = 1
   * FIXME: This code is racy under a multi-threaded environment.
   */
  LOCAL(void)
 -init_simd (void)
 +init_simd(void)
  {
+ #ifndef NO_GETENV
    char *env = NULL;
+ #endif
  
    if (simd_support != ~0U)
      return;
index 0f5c0e809c10a70ad188563fd42d88bb500b2b85,0000000000000000000000000000000000000000..132798a21cccabc68b4818e781c8794a28b1ed49
mode 100644,000000..100644
--- /dev/null
@@@ -1,579 -1,0 +1,583 @@@
-  * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
 +/*
 + * jsimd_loongson.c
 + *
 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
++ * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
 + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
 + * Copyright (C) 2015, Matthieu Darbois.
 + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
 + *
 + * Based on the x86 SIMD extension for IJG JPEG library,
 + * Copyright (C) 1999-2006, MIYASAKA Masaru.
 + * For conditions of distribution and use, see copyright notice in jsimdext.inc
 + *
 + * This file contains the interface between the "normal" portions
 + * of the library and the SIMD implementations when running on a
 + * Loongson architecture.
 + */
 +
 +#define JPEG_INTERNALS
 +#include "../../jinclude.h"
 +#include "../../jpeglib.h"
 +#include "../../jsimd.h"
 +#include "../../jdct.h"
 +#include "../../jsimddct.h"
 +#include "../jsimd.h"
 +
 +static unsigned int simd_support = ~0;
 +
 +/*
 + * Check what SIMD accelerations are supported.
 + *
 + * FIXME: This code is racy under a multi-threaded environment.
 + */
 +LOCAL(void)
 +init_simd(void)
 +{
++#ifndef NO_GETENV
 +  char *env = NULL;
++#endif
 +
 +  if (simd_support != ~0U)
 +    return;
 +
 +  simd_support |= JSIMD_MMI;
 +
++#ifndef NO_GETENV
 +  /* Force different settings through environment variables */
 +  env = getenv("JSIMD_FORCENONE");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support = 0;
++#endif
 +}
 +
 +GLOBAL(int)
 +jsimd_can_rgb_ycc(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_rgb_gray(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb565(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_c_can_null_convert(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                      JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                      int num_rows)
 +{
 +  void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +
 +  switch (cinfo->in_color_space) {
 +  case JCS_EXT_RGB:
 +    mmifct = jsimd_extrgb_ycc_convert_mmi;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    mmifct = jsimd_extrgbx_ycc_convert_mmi;
 +    break;
 +  case JCS_EXT_BGR:
 +    mmifct = jsimd_extbgr_ycc_convert_mmi;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    mmifct = jsimd_extbgrx_ycc_convert_mmi;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    mmifct = jsimd_extxbgr_ycc_convert_mmi;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    mmifct = jsimd_extxrgb_ycc_convert_mmi;
 +    break;
 +  default:
 +    mmifct = jsimd_rgb_ycc_convert_mmi;
 +    break;
 +  }
 +
 +  mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                       JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                       int num_rows)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                      JDIMENSION input_row, JSAMPARRAY output_buf,
 +                      int num_rows)
 +{
 +  void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    mmifct = jsimd_ycc_extrgb_convert_mmi;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    mmifct = jsimd_ycc_extrgbx_convert_mmi;
 +    break;
 +  case JCS_EXT_BGR:
 +    mmifct = jsimd_ycc_extbgr_convert_mmi;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    mmifct = jsimd_ycc_extbgrx_convert_mmi;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    mmifct = jsimd_ycc_extxbgr_convert_mmi;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    mmifct = jsimd_ycc_extxrgb_convert_mmi;
 +    break;
 +  default:
 +    mmifct = jsimd_ycc_rgb_convert_mmi;
 +    break;
 +  }
 +
 +  mmifct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                         JDIMENSION input_row, JSAMPARRAY output_buf,
 +                         int num_rows)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                     JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                     int num_rows)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_smooth_downsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_downsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  jsimd_h2v2_downsample_mmi(cinfo->image_width, cinfo->max_v_samp_factor,
 +                            compptr->v_samp_factor, compptr->width_in_blocks,
 +                            input_data, output_data);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
 +                             jpeg_component_info *compptr,
 +                             JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_int_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                   JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_fancy_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_fancy_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  jsimd_h2v2_fancy_upsample_mmi(cinfo->max_v_samp_factor,
 +                                compptr->downsampled_width, input_data,
 +                                output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_merged_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_merged_upsample(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
 +               DCTELEM *workspace)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
 +                     FAST_FLOAT *workspace)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_ifast(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_islow(DCTELEM *data)
 +{
 +  jsimd_fdct_islow_mmi(data);
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_ifast(DCTELEM *data)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_float(FAST_FLOAT *data)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 +{
 +  jsimd_quantize_mmi(coef_block, divisors, workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
 +                     FAST_FLOAT *workspace)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_2x2(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_4x4(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_6x6(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_12x12(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_MMI)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_ifast(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  jsimd_idct_islow_mmi(compptr->dct_table, coef_block, output_buf, output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_huff_encode_one_block(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(JOCTET *)
 +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
 +                            int last_dc_val, c_derived_tbl *dctbl,
 +                            c_derived_tbl *actbl)
 +{
 +  return NULL;
 +}
index a9c7ebde264a5969519527ea02495f0849477610,0000000000000000000000000000000000000000..1ff105307578495de996560a189133556fb4e787
mode 100644,000000..100644
--- /dev/null
@@@ -1,1084 -1,0 +1,1088 @@@
-  * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
 +/*
 + * jsimd_mips.c
 + *
 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
++ * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
 + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
 + * Copyright (C) 2015, Matthieu Darbois.
 + *
 + * Based on the x86 SIMD extension for IJG JPEG library,
 + * Copyright (C) 1999-2006, MIYASAKA Masaru.
 + * For conditions of distribution and use, see copyright notice in jsimdext.inc
 + *
 + * This file contains the interface between the "normal" portions
 + * of the library and the SIMD implementations when running on a
 + * MIPS architecture.
 + */
 +
 +#define JPEG_INTERNALS
 +#include "../../jinclude.h"
 +#include "../../jpeglib.h"
 +#include "../../jsimd.h"
 +#include "../../jdct.h"
 +#include "../../jsimddct.h"
 +#include "../jsimd.h"
 +
 +#include <stdio.h>
 +#include <string.h>
 +#include <ctype.h>
 +
 +static unsigned int simd_support = ~0;
 +
 +#if defined(__linux__)
 +
 +LOCAL(int)
 +parse_proc_cpuinfo(const char *search_string)
 +{
 +  const char *file_name = "/proc/cpuinfo";
 +  char cpuinfo_line[256];
 +  FILE *f = NULL;
 +
 +  simd_support = 0;
 +
 +  if ((f = fopen(file_name, "r")) != NULL) {
 +    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) {
 +      if (strstr(cpuinfo_line, search_string) != NULL) {
 +        fclose(f);
 +        simd_support |= JSIMD_DSPR2;
 +        return 1;
 +      }
 +    }
 +    fclose(f);
 +  }
 +  /* Did not find string in the proc file, or not Linux ELF. */
 +  return 0;
 +}
 +
 +#endif
 +
 +/*
 + * Check what SIMD accelerations are supported.
 + *
 + * FIXME: This code is racy under a multi-threaded environment.
 + */
 +LOCAL(void)
 +init_simd(void)
 +{
++#ifndef NO_GETENV
 +  char *env = NULL;
++#endif
 +
 +  if (simd_support != ~0U)
 +    return;
 +
 +  simd_support = 0;
 +
 +#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
 +  simd_support |= JSIMD_DSPR2;
 +#elif defined(__linux__)
 +  /* We still have a chance to use MIPS DSPR2 regardless of globally used
 +   * -mdspr2 options passed to gcc by performing runtime detection via
 +   * /proc/cpuinfo parsing on linux */
 +  if (!parse_proc_cpuinfo("MIPS 74K"))
 +    return;
 +#endif
 +
++#ifndef NO_GETENV
 +  /* Force different settings through environment variables */
 +  env = getenv("JSIMD_FORCEDSPR2");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support = JSIMD_DSPR2;
 +  env = getenv("JSIMD_FORCENONE");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support = 0;
++#endif
 +}
 +
 +static const int mips_idct_ifast_coefs[4] = {
 +  0x45404540,           /* FIX( 1.082392200 / 2) =  17734 = 0x4546 */
 +  0x5A805A80,           /* FIX( 1.414213562 / 2) =  23170 = 0x5A82 */
 +  0x76407640,           /* FIX( 1.847759065 / 2) =  30274 = 0x7642 */
 +  0xAC60AC60            /* FIX(-2.613125930 / 4) = -21407 = 0xAC61 */
 +};
 +
 +/* The following struct is borrowed from jdsample.c */
 +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
 +                               jpeg_component_info *compptr,
 +                               JSAMPARRAY input_data,
 +                               JSAMPARRAY *output_data_ptr);
 +typedef struct {
 +  struct jpeg_upsampler pub;
 +  JSAMPARRAY color_buf[MAX_COMPONENTS];
 +  upsample1_ptr methods[MAX_COMPONENTS];
 +  int next_row_out;
 +  JDIMENSION rows_to_go;
 +  int rowgroup_height[MAX_COMPONENTS];
 +  UINT8 h_expand[MAX_COMPONENTS];
 +  UINT8 v_expand[MAX_COMPONENTS];
 +} my_upsampler;
 +
 +typedef my_upsampler *my_upsample_ptr;
 +
 +GLOBAL(int)
 +jsimd_can_rgb_ycc(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_rgb_gray(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb565(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_c_can_null_convert(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                      JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                      int num_rows)
 +{
 +  void (*dspr2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +
 +  switch (cinfo->in_color_space) {
 +  case JCS_EXT_RGB:
 +    dspr2fct = jsimd_extrgb_ycc_convert_dspr2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    dspr2fct = jsimd_extrgbx_ycc_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGR:
 +    dspr2fct = jsimd_extbgr_ycc_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    dspr2fct = jsimd_extbgrx_ycc_convert_dspr2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    dspr2fct = jsimd_extxbgr_ycc_convert_dspr2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    dspr2fct = jsimd_extxrgb_ycc_convert_dspr2;
 +    break;
 +  default:
 +    dspr2fct = jsimd_extrgb_ycc_convert_dspr2;
 +    break;
 +  }
 +
 +  dspr2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                       JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                       int num_rows)
 +{
 +  void (*dspr2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +
 +  switch (cinfo->in_color_space) {
 +  case JCS_EXT_RGB:
 +    dspr2fct = jsimd_extrgb_gray_convert_dspr2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    dspr2fct = jsimd_extrgbx_gray_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGR:
 +    dspr2fct = jsimd_extbgr_gray_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    dspr2fct = jsimd_extbgrx_gray_convert_dspr2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    dspr2fct = jsimd_extxbgr_gray_convert_dspr2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    dspr2fct = jsimd_extxrgb_gray_convert_dspr2;
 +    break;
 +  default:
 +    dspr2fct = jsimd_extrgb_gray_convert_dspr2;
 +    break;
 +  }
 +
 +  dspr2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                      JDIMENSION input_row, JSAMPARRAY output_buf,
 +                      int num_rows)
 +{
 +  void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    dspr2fct = jsimd_ycc_extrgb_convert_dspr2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    dspr2fct = jsimd_ycc_extrgbx_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGR:
 +    dspr2fct = jsimd_ycc_extbgr_convert_dspr2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    dspr2fct = jsimd_ycc_extbgrx_convert_dspr2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    dspr2fct = jsimd_ycc_extxbgr_convert_dspr2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    dspr2fct = jsimd_ycc_extxrgb_convert_dspr2;
 +    break;
 +  default:
 +    dspr2fct = jsimd_ycc_extrgb_convert_dspr2;
 +    break;
 +  }
 +
 +  dspr2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                         JDIMENSION input_row, JSAMPARRAY output_buf,
 +                         int num_rows)
 +{
 +}
 +
 +GLOBAL(void)
 +jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                     JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                     int num_rows)
 +{
 +  jsimd_c_null_convert_dspr2(cinfo->image_width, input_buf, output_buf,
 +                             output_row, num_rows, cinfo->num_components);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_smooth_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (DCTSIZE != 8)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  jsimd_h2v2_downsample_dspr2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                              compptr->v_samp_factor, compptr->width_in_blocks,
 +                              input_data, output_data);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
 +                             jpeg_component_info *compptr,
 +                             JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  jsimd_h2v2_smooth_downsample_dspr2(input_data, output_data,
 +                                     compptr->v_samp_factor,
 +                                     cinfo->max_v_samp_factor,
 +                                     cinfo->smoothing_factor,
 +                                     compptr->width_in_blocks,
 +                                     cinfo->image_width);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  jsimd_h2v1_downsample_dspr2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                              compptr->v_samp_factor, compptr->width_in_blocks,
 +                              input_data, output_data);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_int_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  jsimd_h2v2_upsample_dspr2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                            input_data, output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  jsimd_h2v1_upsample_dspr2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                            input_data, output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                   JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
 +
 +  jsimd_int_upsample_dspr2(upsample->h_expand[compptr->component_index],
 +                           upsample->v_expand[compptr->component_index],
 +                           input_data, output_data_ptr, cinfo->output_width,
 +                           cinfo->max_v_samp_factor);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_fancy_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_fancy_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  jsimd_h2v2_fancy_upsample_dspr2(cinfo->max_v_samp_factor,
 +                                  compptr->downsampled_width, input_data,
 +                                  output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  jsimd_h2v1_fancy_upsample_dspr2(cinfo->max_v_samp_factor,
 +                                  compptr->downsampled_width, input_data,
 +                                  output_data_ptr);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_merged_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_merged_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +  void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    dspr2fct = jsimd_h2v2_extrgb_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    dspr2fct = jsimd_h2v2_extrgbx_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_BGR:
 +    dspr2fct = jsimd_h2v2_extbgr_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    dspr2fct = jsimd_h2v2_extbgrx_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    dspr2fct = jsimd_h2v2_extxbgr_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    dspr2fct = jsimd_h2v2_extxrgb_merged_upsample_dspr2;
 +    break;
 +  default:
 +    dspr2fct = jsimd_h2v2_extrgb_merged_upsample_dspr2;
 +    break;
 +  }
 +
 +  dspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf,
 +           cinfo->sample_range_limit);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +  void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    dspr2fct = jsimd_h2v1_extrgb_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    dspr2fct = jsimd_h2v1_extrgbx_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_BGR:
 +    dspr2fct = jsimd_h2v1_extbgr_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    dspr2fct = jsimd_h2v1_extbgrx_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    dspr2fct = jsimd_h2v1_extxbgr_merged_upsample_dspr2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    dspr2fct = jsimd_h2v1_extxrgb_merged_upsample_dspr2;
 +    break;
 +  default:
 +    dspr2fct = jsimd_h2v1_extrgb_merged_upsample_dspr2;
 +    break;
 +  }
 +
 +  dspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf,
 +           cinfo->sample_range_limit);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp_float(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
 +               DCTELEM *workspace)
 +{
 +  jsimd_convsamp_dspr2(sample_data, start_col, workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
 +                     FAST_FLOAT *workspace)
 +{
 +  jsimd_convsamp_float_dspr2(sample_data, start_col, workspace);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_ifast(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_islow(DCTELEM *data)
 +{
 +  jsimd_fdct_islow_dspr2(data);
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_ifast(DCTELEM *data)
 +{
 +  jsimd_fdct_ifast_dspr2(data);
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_float(FAST_FLOAT *data)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize_float(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 +{
 +  jsimd_quantize_dspr2(coef_block, divisors, workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
 +                     FAST_FLOAT *workspace)
 +{
 +  jsimd_quantize_float_dspr2(coef_block, divisors, workspace);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_2x2(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_4x4(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_6x6(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_12x12(void)
 +{
 +  init_simd();
 +
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +  jsimd_idct_2x2_dspr2(compptr->dct_table, coef_block, output_buf, output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +  int workspace[DCTSIZE * 4]; /* buffers data between passes */
 +
 +  jsimd_idct_4x4_dspr2(compptr->dct_table, coef_block, output_buf, output_col,
 +                       workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +  jsimd_idct_6x6_dspr2(compptr->dct_table, coef_block, output_buf, output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  int workspace[96];
 +  int output[12] = {
 +    (int)(output_buf[0] + output_col),
 +    (int)(output_buf[1] + output_col),
 +    (int)(output_buf[2] + output_col),
 +    (int)(output_buf[3] + output_col),
 +    (int)(output_buf[4] + output_col),
 +    (int)(output_buf[5] + output_col),
 +    (int)(output_buf[6] + output_col),
 +    (int)(output_buf[7] + output_col),
 +    (int)(output_buf[8] + output_col),
 +    (int)(output_buf[9] + output_col),
 +    (int)(output_buf[10] + output_col),
 +    (int)(output_buf[11] + output_col)
 +  };
 +
 +  jsimd_idct_12x12_pass1_dspr2(coef_block, compptr->dct_table, workspace);
 +  jsimd_idct_12x12_pass2_dspr2(workspace, output);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_ifast(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(IFAST_MULT_TYPE) != 2)
 +    return 0;
 +  if (IFAST_SCALE_BITS != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_DSPR2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_float(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  int output[8] = {
 +    (int)(output_buf[0] + output_col),
 +    (int)(output_buf[1] + output_col),
 +    (int)(output_buf[2] + output_col),
 +    (int)(output_buf[3] + output_col),
 +    (int)(output_buf[4] + output_col),
 +    (int)(output_buf[5] + output_col),
 +    (int)(output_buf[6] + output_col),
 +    (int)(output_buf[7] + output_col)
 +  };
 +
 +  jsimd_idct_islow_dspr2(coef_block, compptr->dct_table, output,
 +                         IDCT_range_limit(cinfo));
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  JCOEFPTR inptr;
 +  IFAST_MULT_TYPE *quantptr;
 +  DCTELEM workspace[DCTSIZE2];  /* buffers data between passes */
 +
 +  /* Pass 1: process columns from input, store into work array. */
 +
 +  inptr = coef_block;
 +  quantptr = (IFAST_MULT_TYPE *)compptr->dct_table;
 +
 +  jsimd_idct_ifast_cols_dspr2(inptr, quantptr, workspace,
 +                              mips_idct_ifast_coefs);
 +
 +  /* Pass 2: process rows from work array, store into output array. */
 +  /* Note that we must descale the results by a factor of 8 == 2**3, */
 +  /* and also undo the PASS1_BITS scaling. */
 +
 +  jsimd_idct_ifast_rows_dspr2(workspace, output_buf, output_col,
 +                              mips_idct_ifast_coefs);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_huff_encode_one_block(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(JOCTET *)
 +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
 +                            int last_dc_val, c_derived_tbl *dctbl,
 +                            c_derived_tbl *actbl)
 +{
 +  return NULL;
 +}
index 53f1a7d8c421542bfb7e6ae2f707333570bacefb,9acde065b2240694db62b6cf6fd167c2f4480cdc..84d58e51b12d1d590a579e66e8213a837fc56723
@@@ -109,9 -107,11 +109,11 @@@ parse_proc_cpuinfo(int bufsize
   * FIXME: This code is racy under a multi-threaded environment.
   */
  LOCAL(void)
 -init_simd (void)
 +init_simd(void)
  {
+ #ifndef NO_GETENV
    char *env = NULL;
+ #endif
  #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
    int bufsize = 1024; /* an initial guess for the line buffer size limit */
  #elif defined(__amigaos4__)
index 03a4da32582e295f97f6fcf22aa44e8236b98057,0000000000000000000000000000000000000000..4125248a0f680395762db075b83c75fcb9334547
mode 100644,000000..100644
--- /dev/null
@@@ -1,1018 -1,0 +1,1022 @@@
 +/*
 + * jsimd_x86_64.c
 + *
 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 + * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
 + * Copyright (C) 2015, Matthieu Darbois.
 + *
 + * Based on the x86 SIMD extension for IJG JPEG library,
 + * Copyright (C) 1999-2006, MIYASAKA Masaru.
 + * For conditions of distribution and use, see copyright notice in jsimdext.inc
 + *
 + * This file contains the interface between the "normal" portions
 + * of the library and the SIMD implementations when running on a
 + * 64-bit x86 architecture.
 + */
 +
 +#define JPEG_INTERNALS
 +#include "../../jinclude.h"
 +#include "../../jpeglib.h"
 +#include "../../jsimd.h"
 +#include "../../jdct.h"
 +#include "../../jsimddct.h"
 +#include "../jsimd.h"
 +
 +/*
 + * In the PIC cases, we have no guarantee that constants will keep
 + * their alignment. This macro allows us to verify it at runtime.
 + */
 +#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
 +
 +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 +#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 +
 +static unsigned int simd_support = ~0;
 +static unsigned int simd_huffman = 1;
 +
 +/*
 + * Check what SIMD accelerations are supported.
 + *
 + * FIXME: This code is racy under a multi-threaded environment.
 + */
 +LOCAL(void)
 +init_simd(void)
 +{
++#ifndef NO_GETENV
 +  char *env = NULL;
++#endif
 +
 +  if (simd_support != ~0U)
 +    return;
 +
 +  simd_support = jpeg_simd_cpu_support();
 +
++#ifndef NO_GETENV
 +  /* Force different settings through environment variables */
 +  env = getenv("JSIMD_FORCESSE2");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support &= JSIMD_SSE2;
 +  env = getenv("JSIMD_FORCEAVX2");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support &= JSIMD_AVX2;
 +  env = getenv("JSIMD_FORCENONE");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_support = 0;
 +  env = getenv("JSIMD_NOHUFFENC");
 +  if ((env != NULL) && (strcmp(env, "1") == 0))
 +    simd_huffman = 0;
++#endif
 +}
 +
 +GLOBAL(int)
 +jsimd_can_rgb_ycc(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_rgb_gray(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_ycc_rgb565(void)
 +{
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                      JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                      int num_rows)
 +{
 +  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +
 +  switch (cinfo->in_color_space) {
 +  case JCS_EXT_RGB:
 +    avx2fct = jsimd_extrgb_ycc_convert_avx2;
 +    sse2fct = jsimd_extrgb_ycc_convert_sse2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
 +    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
 +    break;
 +  case JCS_EXT_BGR:
 +    avx2fct = jsimd_extbgr_ycc_convert_avx2;
 +    sse2fct = jsimd_extbgr_ycc_convert_sse2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
 +    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
 +    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
 +    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
 +    break;
 +  default:
 +    avx2fct = jsimd_rgb_ycc_convert_avx2;
 +    sse2fct = jsimd_rgb_ycc_convert_sse2;
 +    break;
 +  }
 +
 +  if (simd_support & JSIMD_AVX2)
 +    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +  else
 +    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 +                       JSAMPIMAGE output_buf, JDIMENSION output_row,
 +                       int num_rows)
 +{
 +  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 +
 +  switch (cinfo->in_color_space) {
 +  case JCS_EXT_RGB:
 +    avx2fct = jsimd_extrgb_gray_convert_avx2;
 +    sse2fct = jsimd_extrgb_gray_convert_sse2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    avx2fct = jsimd_extrgbx_gray_convert_avx2;
 +    sse2fct = jsimd_extrgbx_gray_convert_sse2;
 +    break;
 +  case JCS_EXT_BGR:
 +    avx2fct = jsimd_extbgr_gray_convert_avx2;
 +    sse2fct = jsimd_extbgr_gray_convert_sse2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    avx2fct = jsimd_extbgrx_gray_convert_avx2;
 +    sse2fct = jsimd_extbgrx_gray_convert_sse2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    avx2fct = jsimd_extxbgr_gray_convert_avx2;
 +    sse2fct = jsimd_extxbgr_gray_convert_sse2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    avx2fct = jsimd_extxrgb_gray_convert_avx2;
 +    sse2fct = jsimd_extxrgb_gray_convert_sse2;
 +    break;
 +  default:
 +    avx2fct = jsimd_rgb_gray_convert_avx2;
 +    sse2fct = jsimd_rgb_gray_convert_sse2;
 +    break;
 +  }
 +
 +  if (simd_support & JSIMD_AVX2)
 +    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +  else
 +    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                      JDIMENSION input_row, JSAMPARRAY output_buf,
 +                      int num_rows)
 +{
 +  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 +  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    avx2fct = jsimd_ycc_extrgb_convert_avx2;
 +    sse2fct = jsimd_ycc_extrgb_convert_sse2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
 +    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
 +    break;
 +  case JCS_EXT_BGR:
 +    avx2fct = jsimd_ycc_extbgr_convert_avx2;
 +    sse2fct = jsimd_ycc_extbgr_convert_sse2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
 +    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
 +    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
 +    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
 +    break;
 +  default:
 +    avx2fct = jsimd_ycc_rgb_convert_avx2;
 +    sse2fct = jsimd_ycc_rgb_convert_sse2;
 +    break;
 +  }
 +
 +  if (simd_support & JSIMD_AVX2)
 +    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 +  else
 +    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
 +}
 +
 +GLOBAL(void)
 +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                         JDIMENSION input_row, JSAMPARRAY output_buf,
 +                         int num_rows)
 +{
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_downsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                               compptr->v_samp_factor,
 +                               compptr->width_in_blocks, input_data,
 +                               output_data);
 +  else
 +    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                               compptr->v_samp_factor,
 +                               compptr->width_in_blocks, input_data,
 +                               output_data);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 +                      JSAMPARRAY input_data, JSAMPARRAY output_data)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                               compptr->v_samp_factor,
 +                               compptr->width_in_blocks, input_data,
 +                               output_data);
 +  else
 +    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
 +                               compptr->v_samp_factor,
 +                               compptr->width_in_blocks, input_data,
 +                               output_data);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                             input_data, output_data_ptr);
 +  else
 +    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                             input_data, output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                             input_data, output_data_ptr);
 +  else
 +    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
 +                             input_data, output_data_ptr);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_fancy_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_fancy_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
 +                                   compptr->downsampled_width, input_data,
 +                                   output_data_ptr);
 +  else
 +    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
 +                                   compptr->downsampled_width, input_data,
 +                                   output_data_ptr);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
 +                                   compptr->downsampled_width, input_data,
 +                                   output_data_ptr);
 +  else
 +    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
 +                                   compptr->downsampled_width, input_data,
 +                                   output_data_ptr);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v2_merged_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_h2v1_merged_upsample(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) &&
 +      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) &&
 +      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 +  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_BGR:
 +    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
 +    break;
 +  default:
 +    avx2fct = jsimd_h2v2_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v2_merged_upsample_sse2;
 +    break;
 +  }
 +
 +  if (simd_support & JSIMD_AVX2)
 +    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 +  else
 +    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 +}
 +
 +GLOBAL(void)
 +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 +                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
 +{
 +  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 +  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 +
 +  switch (cinfo->out_color_space) {
 +  case JCS_EXT_RGB:
 +    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_RGBX:
 +  case JCS_EXT_RGBA:
 +    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_BGR:
 +    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_BGRX:
 +  case JCS_EXT_BGRA:
 +    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_XBGR:
 +  case JCS_EXT_ABGR:
 +    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
 +    break;
 +  case JCS_EXT_XRGB:
 +  case JCS_EXT_ARGB:
 +    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
 +    break;
 +  default:
 +    avx2fct = jsimd_h2v1_merged_upsample_avx2;
 +    sse2fct = jsimd_h2v1_merged_upsample_sse2;
 +    break;
 +  }
 +
 +  if (simd_support & JSIMD_AVX2)
 +    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 +  else
 +    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_convsamp_float(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(FAST_FLOAT) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
 +               DCTELEM *workspace)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_convsamp_avx2(sample_data, start_col, workspace);
 +  else
 +    jsimd_convsamp_sse2(sample_data, start_col, workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
 +                     FAST_FLOAT *workspace)
 +{
 +  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_ifast(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_fdct_float(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(FAST_FLOAT) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_islow(DCTELEM *data)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_fdct_islow_avx2(data);
 +  else
 +    jsimd_fdct_islow_sse2(data);
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_ifast(DCTELEM *data)
 +{
 +  jsimd_fdct_ifast_sse2(data);
 +}
 +
 +GLOBAL(void)
 +jsimd_fdct_float(FAST_FLOAT *data)
 +{
 +  jsimd_fdct_float_sse(data);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (sizeof(DCTELEM) != 2)
 +    return 0;
 +
 +  if (simd_support & JSIMD_AVX2)
 +    return 1;
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_quantize_float(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (sizeof(FAST_FLOAT) != 4)
 +    return 0;
 +
 +  if (simd_support & JSIMD_SSE2)
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_quantize_avx2(coef_block, divisors, workspace);
 +  else
 +    jsimd_quantize_sse2(coef_block, divisors, workspace);
 +}
 +
 +GLOBAL(void)
 +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
 +                     FAST_FLOAT *workspace)
 +{
 +  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_2x2(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_4x4(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +               JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +               JDIMENSION output_col)
 +{
 +  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_islow(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
 +    return 1;
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_ifast(void)
 +{
 +  init_simd();
 +
 +  /* The code is optimised for these values only */
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(IFAST_MULT_TYPE) != 2)
 +    return 0;
 +  if (IFAST_SCALE_BITS != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(int)
 +jsimd_can_idct_float(void)
 +{
 +  init_simd();
 +
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +  if (BITS_IN_JSAMPLE != 8)
 +    return 0;
 +  if (sizeof(JDIMENSION) != 4)
 +    return 0;
 +  if (sizeof(FAST_FLOAT) != 4)
 +    return 0;
 +  if (sizeof(FLOAT_MULT_TYPE) != 4)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  if (simd_support & JSIMD_AVX2)
 +    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
 +                          output_col);
 +  else
 +    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
 +                          output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
 +                        output_col);
 +}
 +
 +GLOBAL(void)
 +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 +                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
 +                 JDIMENSION output_col)
 +{
 +  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
 +                        output_col);
 +}
 +
 +GLOBAL(int)
 +jsimd_can_huff_encode_one_block(void)
 +{
 +  init_simd();
 +
 +  if (DCTSIZE != 8)
 +    return 0;
 +  if (sizeof(JCOEF) != 2)
 +    return 0;
 +
 +  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
 +      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
 +    return 1;
 +
 +  return 0;
 +}
 +
 +GLOBAL(JOCTET *)
 +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
 +                            int last_dc_val, c_derived_tbl *dctbl,
 +                            c_derived_tbl *actbl)
 +{
 +  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
 +                                          dctbl, actbl);
 +}
diff --cc turbojpeg.c
index bb061a20ee58cef33e6a5ff60957e699fef4ae93,df907b09e6c4232a6b85fa2c9fda1249af8deac7..aa7d7a7279e168d0daebabce3616ed17e7c40f3f
@@@ -594,476 -754,504 +594,482 @@@ bailout
  }
  
  
 -DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf,
 -      int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf,
 -      unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
 +DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
 +                          int width, int pitch, int height, int pixelFormat,
 +                          unsigned char **jpegBuf, unsigned long *jpegSize,
 +                          int jpegSubsamp, int jpegQual, int flags)
  {
 -      int i, retval=0, alloc=1;  JSAMPROW *row_pointer=NULL;
 -      #ifndef JCS_EXTENSIONS
 -      unsigned char *rgbBuf=NULL;
 -      #endif
 -
 -      getcinstance(handle)
 -      if((this->init&COMPRESS)==0)
 -              _throw("tjCompress2(): Instance has not been initialized for compression");
 -
 -      if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
 -              || pixelFormat>=TJ_NUMPF || jpegBuf==NULL || jpegSize==NULL
 -              || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT || jpegQual<0 || jpegQual>100)
 -              _throw("tjCompress2(): Invalid argument");
 -
 -      if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
 -
 -      #ifndef JCS_EXTENSIONS
 -      if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK)
 -      {
 -              rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
 -              if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure");
 -              srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
 -              pitch=width*RGB_PIXELSIZE;
 -      }
 -      #endif
 -
 -      if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL)
 -              _throw("tjCompress2(): Memory allocation failure");
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      cinfo->image_width=width;
 -      cinfo->image_height=height;
 +  int i, retval = 0, alloc = 1;
 +  JSAMPROW *row_pointer = NULL;
 +
 +  getcinstance(handle)
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +  if ((this->init & COMPRESS) == 0)
 +    _throw("tjCompress2(): Instance has not been initialized for compression");
 +
 +  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
 +      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL ||
 +      jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT ||
 +      jpegQual < 0 || jpegQual > 100)
 +    _throw("tjCompress2(): Invalid argument");
 +
 +  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
 +
 +  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * height)) == NULL)
 +    _throw("tjCompress2(): Memory allocation failure");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  cinfo->image_width = width;
 +  cinfo->image_height = height;
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(flags&TJFLAG_NOREALLOC)
 -      {
 -              alloc=0;  *jpegSize=tjBufSize(width, height, jpegSubsamp);
 -      }
 -      jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
 -      if(setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags)==-1)
 -              return -1;
 -
 -      jpeg_start_compress(cinfo, TRUE);
 -      for(i=0; i<height; i++)
 -      {
 -              if(flags&TJFLAG_BOTTOMUP)
 -                      row_pointer[i]=(JSAMPROW)&srcBuf[(height-i-1)*pitch];
 -              else row_pointer[i]=(JSAMPROW)&srcBuf[i*pitch];
 -      }
 -      while(cinfo->next_scanline<cinfo->image_height)
 -      {
 -              jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline],
 -                      cinfo->image_height-cinfo->next_scanline);
 -      }
 -      jpeg_finish_compress(cinfo);
 -
 -      bailout:
 -      if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
 -      #ifndef JCS_EXTENSIONS
 -      if(rgbBuf) free(rgbBuf);
 -      #endif
 -      if(row_pointer) free(row_pointer);
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (flags & TJFLAG_NOREALLOC) {
 +    alloc = 0;  *jpegSize = tjBufSize(width, height, jpegSubsamp);
 +  }
 +  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
 +  if (setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags) == -1)
 +    return -1;
 +
 +  jpeg_start_compress(cinfo, TRUE);
 +  for (i = 0; i < height; i++) {
 +    if (flags & TJFLAG_BOTTOMUP)
 +      row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * pitch];
 +    else
 +      row_pointer[i] = (JSAMPROW)&srcBuf[i * pitch];
 +  }
 +  while (cinfo->next_scanline < cinfo->image_height)
 +    jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline],
 +                         cinfo->image_height - cinfo->next_scanline);
 +  jpeg_finish_compress(cinfo);
 +
 +bailout:
 +  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
 +  if (row_pointer) free(row_pointer);
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf,
 -      int width, int pitch, int height, int pixelSize, unsigned char *jpegBuf,
 -      unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
 +DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
 +                         int pitch, int height, int pixelSize,
 +                         unsigned char *jpegBuf, unsigned long *jpegSize,
 +                         int jpegSubsamp, int jpegQual, int flags)
  {
 -      int retval=0;  unsigned long size;
 -      if(flags&TJ_YUV)
 -      {
 -              size=tjBufSizeYUV(width, height, jpegSubsamp);
 -              retval=tjEncodeYUV2(handle, srcBuf, width, pitch, height,
 -                      getPixelFormat(pixelSize, flags), jpegBuf, jpegSubsamp, flags);
 -      }
 -      else
 -      {
 -              retval=tjCompress2(handle, srcBuf, width, pitch, height,
 -                      getPixelFormat(pixelSize, flags), &jpegBuf, &size, jpegSubsamp, jpegQual,
 -                      flags|TJFLAG_NOREALLOC);
 -      }
 -      *jpegSize=size;
 -      return retval;
 +  int retval = 0;
 +  unsigned long size;
 +
 +  if (flags & TJ_YUV) {
 +    size = tjBufSizeYUV(width, height, jpegSubsamp);
 +    retval = tjEncodeYUV2(handle, srcBuf, width, pitch, height,
 +                          getPixelFormat(pixelSize, flags), jpegBuf,
 +                          jpegSubsamp, flags);
 +  } else {
 +    retval = tjCompress2(handle, srcBuf, width, pitch, height,
 +                         getPixelFormat(pixelSize, flags), &jpegBuf, &size,
 +                         jpegSubsamp, jpegQual, flags | TJFLAG_NOREALLOC);
 +  }
 +  *jpegSize = size;
 +  return retval;
  }
  
  
 -DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle,
 -      const unsigned char *srcBuf, int width, int pitch, int height,
 -      int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp,
 -      int flags)
 +DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
 +                                int width, int pitch, int height,
 +                                int pixelFormat, unsigned char **dstPlanes,
 +                                int *strides, int subsamp, int flags)
  {
 -      int i, retval=0;  JSAMPROW *row_pointer=NULL;
 -      JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
 -      JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS];
 -      JSAMPROW *outbuf[MAX_COMPONENTS];
 -      int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
 -      JSAMPLE *ptr;
 -      jpeg_component_info *compptr;
 -      #ifndef JCS_EXTENSIONS
 -      unsigned char *rgbBuf=NULL;
 -      #endif
 -
 -      getcinstance(handle);
 -
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              tmpbuf[i]=NULL;  _tmpbuf[i]=NULL;
 -              tmpbuf2[i]=NULL;  _tmpbuf2[i]=NULL;  outbuf[i]=NULL;
 -      }
 -
 -      if((this->init&COMPRESS)==0)
 -              _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression");
 -
 -      if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
 -              || pixelFormat>=TJ_NUMPF || !dstPlanes || !dstPlanes[0] || subsamp<0
 -              || subsamp>=NUMSUBOPT)
 -              _throw("tjEncodeYUVPlanes(): Invalid argument");
 -      if(subsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
 -              _throw("tjEncodeYUVPlanes(): Invalid argument");
 -
 -      if(pixelFormat==TJPF_CMYK)
 -              _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels");
 -
 -      if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
 -
 -      #ifndef JCS_EXTENSIONS
 -      if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK)
 -      {
 -              rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
 -              if(!rgbBuf) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
 -              pitch=width*RGB_PIXELSIZE;
 -      }
 -      #endif
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      cinfo->image_width=width;
 -      cinfo->image_height=height;
 +  JSAMPROW *row_pointer = NULL;
 +  JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
 +  JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS];
 +  JSAMPROW *outbuf[MAX_COMPONENTS];
 +  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
 +  JSAMPLE *ptr;
 +  jpeg_component_info *compptr;
 +
 +  getcinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;
 +    tmpbuf2[i] = NULL;  _tmpbuf2[i] = NULL;  outbuf[i] = NULL;
 +  }
 +
 +  if ((this->init & COMPRESS) == 0)
 +    _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression");
 +
 +  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
 +      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes ||
 +      !dstPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT)
 +    _throw("tjEncodeYUVPlanes(): Invalid argument");
 +  if (subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
 +    _throw("tjEncodeYUVPlanes(): Invalid argument");
 +
 +  if (pixelFormat == TJPF_CMYK)
 +    _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels");
 +
 +  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  cinfo->image_width = width;
 +  cinfo->image_height = height;
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1;
 -
 -      /* Execute only the parts of jpeg_start_compress() that we need.  If we
 -         were to call the whole jpeg_start_compress() function, then it would try
 -         to write the file headers, which could overflow the output buffer if the
 -         YUV image were very small. */
 -      if(cinfo->global_state!=CSTATE_START)
 -              _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state");
 -      (*cinfo->err->reset_error_mgr)((j_common_ptr)cinfo);
 -      jinit_c_master_control(cinfo, FALSE);
 -      jinit_color_converter(cinfo);
 -      jinit_downsampler(cinfo);
 -      (*cinfo->cconvert->start_pass)(cinfo);
 -
 -      pw0=PAD(width, cinfo->max_h_samp_factor);
 -      ph0=PAD(height, cinfo->max_v_samp_factor);
 -
 -      if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL)
 -              _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -      for(i=0; i<height; i++)
 -      {
 -              if(flags&TJFLAG_BOTTOMUP)
 -                      row_pointer[i]=(JSAMPROW)&srcBuf[(height-i-1)*pitch];
 -              else row_pointer[i]=(JSAMPROW)&srcBuf[i*pitch];
 -      }
 -      if(height<ph0)
 -              for(i=height; i<ph0; i++) row_pointer[i]=row_pointer[height-1];
 -
 -      for(i=0; i<cinfo->num_components; i++)
 -      {
 -              compptr=&cinfo->comp_info[i];
 -              _tmpbuf[i]=(JSAMPLE *)malloc(
 -                      PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE)
 -                              /compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16);
 -              if(!_tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor);
 -              if(!tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              for(row=0; row<cinfo->max_v_samp_factor; row++)
 -              {
 -                      unsigned char *_tmpbuf_aligned=
 -                              (unsigned char *)PAD((size_t)_tmpbuf[i], 16);
 -                      tmpbuf[i][row]=&_tmpbuf_aligned[
 -                              PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE)
 -                                      /compptr->h_samp_factor, 16) * row];
 -              }
 -              _tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
 -                      * compptr->v_samp_factor + 16);
 -              if(!_tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
 -              if(!tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              for(row=0; row<compptr->v_samp_factor; row++)
 -              {
 -                      unsigned char *_tmpbuf2_aligned=
 -                              (unsigned char *)PAD((size_t)_tmpbuf2[i], 16);
 -                      tmpbuf2[i][row]=&_tmpbuf2_aligned[
 -                              PAD(compptr->width_in_blocks*DCTSIZE, 16) * row];
 -              }
 -              pw[i]=pw0*compptr->h_samp_factor/cinfo->max_h_samp_factor;
 -              ph[i]=ph0*compptr->v_samp_factor/cinfo->max_v_samp_factor;
 -              outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]);
 -              if(!outbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 -              ptr=dstPlanes[i];
 -              for(row=0; row<ph[i]; row++)
 -              {
 -                      outbuf[i][row]=ptr;
 -                      ptr+=(strides && strides[i]!=0)? strides[i]:pw[i];
 -              }
 -      }
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      for(row=0; row<ph0; row+=cinfo->max_v_samp_factor)
 -      {
 -              (*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, 0,
 -                      cinfo->max_v_samp_factor);
 -              (cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0);
 -              for(i=0, compptr=cinfo->comp_info; i<cinfo->num_components; i++, compptr++)
 -                      jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i],
 -                              row*compptr->v_samp_factor/cinfo->max_v_samp_factor,
 -                              compptr->v_samp_factor, pw[i]);
 -      }
 -      cinfo->next_scanline+=height;
 -      jpeg_abort_compress(cinfo);
 -
 -      bailout:
 -      if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
 -      #ifndef JCS_EXTENSIONS
 -      if(rgbBuf) free(rgbBuf);
 -      #endif
 -      if(row_pointer) free(row_pointer);
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              if(tmpbuf[i]!=NULL) free(tmpbuf[i]);
 -              if(_tmpbuf[i]!=NULL) free(_tmpbuf[i]);
 -              if(tmpbuf2[i]!=NULL) free(tmpbuf2[i]);
 -              if(_tmpbuf2[i]!=NULL) free(_tmpbuf2[i]);
 -              if(outbuf[i]!=NULL) free(outbuf[i]);
 -      }
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags) == -1) return -1;
 +
 +  /* Execute only the parts of jpeg_start_compress() that we need.  If we
 +     were to call the whole jpeg_start_compress() function, then it would try
 +     to write the file headers, which could overflow the output buffer if the
 +     YUV image were very small. */
 +  if (cinfo->global_state != CSTATE_START)
 +    _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state");
 +  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
 +  jinit_c_master_control(cinfo, FALSE);
 +  jinit_color_converter(cinfo);
 +  jinit_downsampler(cinfo);
 +  (*cinfo->cconvert->start_pass) (cinfo);
 +
 +  pw0 = PAD(width, cinfo->max_h_samp_factor);
 +  ph0 = PAD(height, cinfo->max_v_samp_factor);
 +
 +  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
 +    _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +  for (i = 0; i < height; i++) {
 +    if (flags & TJFLAG_BOTTOMUP)
 +      row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * pitch];
 +    else
 +      row_pointer[i] = (JSAMPROW)&srcBuf[i * pitch];
 +  }
 +  if (height < ph0)
 +    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
 +
 +  for (i = 0; i < cinfo->num_components; i++) {
 +    compptr = &cinfo->comp_info[i];
 +    _tmpbuf[i] = (JSAMPLE *)malloc(
 +      PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
 +          compptr->h_samp_factor, 32) *
 +      cinfo->max_v_samp_factor + 32);
 +    if (!_tmpbuf[i])
 +      _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +    tmpbuf[i] =
 +      (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo->max_v_samp_factor);
 +    if (!tmpbuf[i])
 +      _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +    for (row = 0; row < cinfo->max_v_samp_factor; row++) {
 +      unsigned char *_tmpbuf_aligned =
 +        (unsigned char *)PAD((size_t)_tmpbuf[i], 32);
 +
 +      tmpbuf[i][row] = &_tmpbuf_aligned[
 +        PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
 +            compptr->h_samp_factor, 32) * row];
 +    }
 +    _tmpbuf2[i] =
 +      (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
 +                        compptr->v_samp_factor + 32);
 +    if (!_tmpbuf2[i])
 +      _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +    tmpbuf2[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
 +    if (!tmpbuf2[i])
 +      _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +    for (row = 0; row < compptr->v_samp_factor; row++) {
 +      unsigned char *_tmpbuf2_aligned =
 +        (unsigned char *)PAD((size_t)_tmpbuf2[i], 32);
 +
 +      tmpbuf2[i][row] =
 +        &_tmpbuf2_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
 +    }
 +    pw[i] = pw0 * compptr->h_samp_factor / cinfo->max_h_samp_factor;
 +    ph[i] = ph0 * compptr->v_samp_factor / cinfo->max_v_samp_factor;
 +    outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
 +    if (!outbuf[i])
 +      _throw("tjEncodeYUVPlanes(): Memory allocation failure");
 +    ptr = dstPlanes[i];
 +    for (row = 0; row < ph[i]; row++) {
 +      outbuf[i][row] = ptr;
 +      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
 +    }
 +  }
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  for (row = 0; row < ph0; row += cinfo->max_v_samp_factor) {
 +    (*cinfo->cconvert->color_convert) (cinfo, &row_pointer[row], tmpbuf, 0,
 +                                       cinfo->max_v_samp_factor);
 +    (cinfo->downsample->downsample) (cinfo, tmpbuf, 0, tmpbuf2, 0);
 +    for (i = 0, compptr = cinfo->comp_info; i < cinfo->num_components;
 +         i++, compptr++)
 +      jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i],
 +        row * compptr->v_samp_factor / cinfo->max_v_samp_factor,
 +        compptr->v_samp_factor, pw[i]);
 +  }
 +  cinfo->next_scanline += height;
 +  jpeg_abort_compress(cinfo);
 +
 +bailout:
 +  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
 +  if (row_pointer) free(row_pointer);
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    if (tmpbuf[i] != NULL) free(tmpbuf[i]);
 +    if (_tmpbuf[i] != NULL) free(_tmpbuf[i]);
 +    if (tmpbuf2[i] != NULL) free(tmpbuf2[i]);
 +    if (_tmpbuf2[i] != NULL) free(_tmpbuf2[i]);
 +    if (outbuf[i] != NULL) free(outbuf[i]);
 +  }
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle,
 -      const unsigned char *srcBuf, int width, int pitch, int height,
 -      int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags)
 +DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
 +                           int width, int pitch, int height, int pixelFormat,
 +                           unsigned char *dstBuf, int pad, int subsamp,
 +                           int flags)
  {
 -      unsigned char *dstPlanes[3];
 -      int pw0, ph0, strides[3], retval=-1;
 -
 -      if(width<=0 || height<=0 || dstBuf==NULL || pad<0 || !isPow2(pad)
 -              || subsamp<0 || subsamp>=NUMSUBOPT)
 -              _throw("tjEncodeYUV3(): Invalid argument");
 -
 -      pw0=tjPlaneWidth(0, width, subsamp);
 -      ph0=tjPlaneHeight(0, height, subsamp);
 -      dstPlanes[0]=dstBuf;
 -      strides[0]=PAD(pw0, pad);
 -      if(subsamp==TJSAMP_GRAY)
 -      {
 -              strides[1]=strides[2]=0;
 -              dstPlanes[1]=dstPlanes[2]=NULL;
 -      }
 -      else
 -      {
 -              int pw1=tjPlaneWidth(1, width, subsamp);
 -              int ph1=tjPlaneHeight(1, height, subsamp);
 -              strides[1]=strides[2]=PAD(pw1, pad);
 -              dstPlanes[1]=dstPlanes[0]+strides[0]*ph0;
 -              dstPlanes[2]=dstPlanes[1]+strides[1]*ph1;
 -      }
 -
 -      return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat,
 -              dstPlanes, strides, subsamp, flags);
 -
 -      bailout:
 -      return retval;
 +  unsigned char *dstPlanes[3];
 +  int pw0, ph0, strides[3], retval = -1;
 +  tjinstance *this = (tjinstance *)handle;
 +
 +  if (!this) _throwg("tjEncodeYUV3(): Invalid handle");
 +  this->isInstanceError = FALSE;
 +
 +  if (width <= 0 || height <= 0 || dstBuf == NULL || pad < 0 || !isPow2(pad) ||
 +      subsamp < 0 || subsamp >= NUMSUBOPT)
 +    _throw("tjEncodeYUV3(): Invalid argument");
 +
 +  pw0 = tjPlaneWidth(0, width, subsamp);
 +  ph0 = tjPlaneHeight(0, height, subsamp);
 +  dstPlanes[0] = dstBuf;
 +  strides[0] = PAD(pw0, pad);
 +  if (subsamp == TJSAMP_GRAY) {
 +    strides[1] = strides[2] = 0;
 +    dstPlanes[1] = dstPlanes[2] = NULL;
 +  } else {
 +    int pw1 = tjPlaneWidth(1, width, subsamp);
 +    int ph1 = tjPlaneHeight(1, height, subsamp);
 +
 +    strides[1] = strides[2] = PAD(pw1, pad);
 +    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
 +    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
 +  }
 +
 +  return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat,
 +                           dstPlanes, strides, subsamp, flags);
 +
 +bailout:
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf,
 -      int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf,
 -      int subsamp, int flags)
 +DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
 +                           int pitch, int height, int pixelFormat,
 +                           unsigned char *dstBuf, int subsamp, int flags)
  {
 -      return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
 -              dstBuf, 4, subsamp, flags);
 +  return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
 +                      dstBuf, 4, subsamp, flags);
  }
  
 -DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf,
 -      int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
 -      int subsamp, int flags)
 +DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
 +                          int pitch, int height, int pixelSize,
 +                          unsigned char *dstBuf, int subsamp, int flags)
  {
 -      return tjEncodeYUV2(handle, srcBuf, width, pitch, height,
 -              getPixelFormat(pixelSize, flags), dstBuf, subsamp, flags);
 +  return tjEncodeYUV2(handle, srcBuf, width, pitch, height,
 +                      getPixelFormat(pixelSize, flags), dstBuf, subsamp,
 +                      flags);
  }
  
  
 -DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle,
 -      const unsigned char **srcPlanes, int width, const int *strides, int height,
 -      int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual,
 -      int flags)
 +DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
 +                                      const unsigned char **srcPlanes,
 +                                      int width, const int *strides,
 +                                      int height, int subsamp,
 +                                      unsigned char **jpegBuf,
 +                                      unsigned long *jpegSize, int jpegQual,
 +                                      int flags)
  {
 -      int i, row, retval=0, alloc=1;  JSAMPROW *inbuf[MAX_COMPONENTS];
 -      int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
 -              tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
 -      JSAMPLE *_tmpbuf=NULL, *ptr;  JSAMPROW *tmpbuf[MAX_COMPONENTS];
 +  int i, row, retval = 0, alloc = 1;
 +  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
 +    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
 +  JSAMPLE *_tmpbuf = NULL, *ptr;
 +  JSAMPROW *inbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
  
 -      getcinstance(handle)
 +  getcinstance(handle)
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
  
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              tmpbuf[i]=NULL;  inbuf[i]=NULL;
 -      }
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    tmpbuf[i] = NULL;  inbuf[i] = NULL;
 +  }
  
 -      if((this->init&COMPRESS)==0)
 -              _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression");
 +  if ((this->init & COMPRESS) == 0)
 +    _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression");
  
 -      if(!srcPlanes || !srcPlanes[0] || width<=0 || height<=0 || subsamp<0
 -              || subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0
 -              || jpegQual>100)
 -              _throw("tjCompressFromYUVPlanes(): Invalid argument");
 -      if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
 -              _throw("tjCompressFromYUVPlanes(): Invalid argument");
 +  if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 ||
 +      subsamp < 0 || subsamp >= NUMSUBOPT || jpegBuf == NULL ||
 +      jpegSize == NULL || jpegQual < 0 || jpegQual > 100)
 +    _throw("tjCompressFromYUVPlanes(): Invalid argument");
 +  if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
 +    _throw("tjCompressFromYUVPlanes(): Invalid argument");
  
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
  
 -      cinfo->image_width=width;
 -      cinfo->image_height=height;
 +  cinfo->image_width = width;
 +  cinfo->image_height = height;
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(flags&TJFLAG_NOREALLOC)
 -      {
 -              alloc=0;  *jpegSize=tjBufSize(width, height, subsamp);
 -      }
 -      jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
 -      if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1)
 -              return -1;
 -      cinfo->raw_data_in=TRUE;
 -
 -      jpeg_start_compress(cinfo, TRUE);
 -      for(i=0; i<cinfo->num_components; i++)
 -      {
 -              jpeg_component_info *compptr=&cinfo->comp_info[i];
 -              int ih;
 -              iw[i]=compptr->width_in_blocks*DCTSIZE;
 -              ih=compptr->height_in_blocks*DCTSIZE;
 -              pw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor)
 -                      *compptr->h_samp_factor/cinfo->max_h_samp_factor;
 -              ph[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor)
 -                      *compptr->v_samp_factor/cinfo->max_v_samp_factor;
 -              if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1;
 -              th[i]=compptr->v_samp_factor*DCTSIZE;
 -              tmpbufsize+=iw[i]*th[i];
 -              if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL)
 -                      _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 -              ptr=(JSAMPLE *)srcPlanes[i];
 -              for(row=0; row<ph[i]; row++)
 -              {
 -                      inbuf[i][row]=ptr;
 -                      ptr+=(strides && strides[i]!=0)? strides[i]:pw[i];
 -              }
 -      }
 -      if(usetmpbuf)
 -      {
 -              if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
 -                      _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 -              ptr=_tmpbuf;
 -              for(i=0; i<cinfo->num_components; i++)
 -              {
 -                      if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
 -                              _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 -                      for(row=0; row<th[i]; row++)
 -                      {
 -                              tmpbuf[i][row]=ptr;
 -                              ptr+=iw[i];
 -                      }
 -              }
 -      }
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      for(row=0; row<(int)cinfo->image_height;
 -              row+=cinfo->max_v_samp_factor*DCTSIZE)
 -      {
 -              JSAMPARRAY yuvptr[MAX_COMPONENTS];
 -              int crow[MAX_COMPONENTS];
 -              for(i=0; i<cinfo->num_components; i++)
 -              {
 -                      jpeg_component_info *compptr=&cinfo->comp_info[i];
 -                      crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor;
 -                      if(usetmpbuf)
 -                      {
 -                              int j, k;
 -                              for(j=0; j<min(th[i], ph[i]-crow[i]); j++)
 -                              {
 -                                      memcpy(tmpbuf[i][j], inbuf[i][crow[i]+j], pw[i]);
 -                                      /* Duplicate last sample in row to fill out MCU */
 -                                      for(k=pw[i]; k<iw[i]; k++) tmpbuf[i][j][k]=tmpbuf[i][j][pw[i]-1];
 -                              }
 -                              /* Duplicate last row to fill out MCU */
 -                              for(j=ph[i]-crow[i]; j<th[i]; j++)
 -                                      memcpy(tmpbuf[i][j], tmpbuf[i][ph[i]-crow[i]-1], iw[i]);
 -                              yuvptr[i]=tmpbuf[i];
 -                      }
 -                      else
 -                              yuvptr[i]=&inbuf[i][crow[i]];
 -              }
 -              jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor*DCTSIZE);
 -      }
 -      jpeg_finish_compress(cinfo);
 -
 -      bailout:
 -      if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              if(tmpbuf[i]) free(tmpbuf[i]);
 -              if(inbuf[i]) free(inbuf[i]);
 -      }
 -      if(_tmpbuf) free(_tmpbuf);
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (flags & TJFLAG_NOREALLOC) {
 +    alloc = 0;  *jpegSize = tjBufSize(width, height, subsamp);
 +  }
 +  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
 +  if (setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags) == -1)
 +    return -1;
 +  cinfo->raw_data_in = TRUE;
 +
 +  jpeg_start_compress(cinfo, TRUE);
 +  for (i = 0; i < cinfo->num_components; i++) {
 +    jpeg_component_info *compptr = &cinfo->comp_info[i];
 +    int ih;
 +
 +    iw[i] = compptr->width_in_blocks * DCTSIZE;
 +    ih = compptr->height_in_blocks * DCTSIZE;
 +    pw[i] = PAD(cinfo->image_width, cinfo->max_h_samp_factor) *
 +            compptr->h_samp_factor / cinfo->max_h_samp_factor;
 +    ph[i] = PAD(cinfo->image_height, cinfo->max_v_samp_factor) *
 +            compptr->v_samp_factor / cinfo->max_v_samp_factor;
 +    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
 +    th[i] = compptr->v_samp_factor * DCTSIZE;
 +    tmpbufsize += iw[i] * th[i];
 +    if ((inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
 +      _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 +    ptr = (JSAMPLE *)srcPlanes[i];
 +    for (row = 0; row < ph[i]; row++) {
 +      inbuf[i][row] = ptr;
 +      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
 +    }
 +  }
 +  if (usetmpbuf) {
 +    if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
 +      _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 +    ptr = _tmpbuf;
 +    for (i = 0; i < cinfo->num_components; i++) {
 +      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
 +        _throw("tjCompressFromYUVPlanes(): Memory allocation failure");
 +      for (row = 0; row < th[i]; row++) {
 +        tmpbuf[i][row] = ptr;
 +        ptr += iw[i];
 +      }
 +    }
 +  }
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  for (row = 0; row < (int)cinfo->image_height;
 +       row += cinfo->max_v_samp_factor * DCTSIZE) {
 +    JSAMPARRAY yuvptr[MAX_COMPONENTS];
 +    int crow[MAX_COMPONENTS];
 +
 +    for (i = 0; i < cinfo->num_components; i++) {
 +      jpeg_component_info *compptr = &cinfo->comp_info[i];
 +
 +      crow[i] = row * compptr->v_samp_factor / cinfo->max_v_samp_factor;
 +      if (usetmpbuf) {
 +        int j, k;
 +
 +        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
 +          memcpy(tmpbuf[i][j], inbuf[i][crow[i] + j], pw[i]);
 +          /* Duplicate last sample in row to fill out MCU */
 +          for (k = pw[i]; k < iw[i]; k++)
 +            tmpbuf[i][j][k] = tmpbuf[i][j][pw[i] - 1];
 +        }
 +        /* Duplicate last row to fill out MCU */
 +        for (j = ph[i] - crow[i]; j < th[i]; j++)
 +          memcpy(tmpbuf[i][j], tmpbuf[i][ph[i] - crow[i] - 1], iw[i]);
 +        yuvptr[i] = tmpbuf[i];
 +      } else
 +        yuvptr[i] = &inbuf[i][crow[i]];
 +    }
 +    jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor * DCTSIZE);
 +  }
 +  jpeg_finish_compress(cinfo);
 +
 +bailout:
 +  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    if (tmpbuf[i]) free(tmpbuf[i]);
 +    if (inbuf[i]) free(inbuf[i]);
 +  }
 +  if (_tmpbuf) free(_tmpbuf);
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle,
 -      const unsigned char *srcBuf, int width, int pad, int height, int subsamp,
 -      unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)
 +DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
 +                                int width, int pad, int height, int subsamp,
 +                                unsigned char **jpegBuf,
 +                                unsigned long *jpegSize, int jpegQual,
 +                                int flags)
  {
 -      const unsigned char *srcPlanes[3];
 -      int pw0, ph0, strides[3], retval=-1;
 -
 -      if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0
 -              || subsamp>=NUMSUBOPT)
 -              _throw("tjCompressFromYUV(): Invalid argument");
 -
 -      pw0=tjPlaneWidth(0, width, subsamp);
 -      ph0=tjPlaneHeight(0, height, subsamp);
 -      srcPlanes[0]=srcBuf;
 -      strides[0]=PAD(pw0, pad);
 -      if(subsamp==TJSAMP_GRAY)
 -      {
 -              strides[1]=strides[2]=0;
 -              srcPlanes[1]=srcPlanes[2]=NULL;
 -      }
 -      else
 -      {
 -              int pw1=tjPlaneWidth(1, width, subsamp);
 -              int ph1=tjPlaneHeight(1, height, subsamp);
 -              strides[1]=strides[2]=PAD(pw1, pad);
 -              srcPlanes[1]=srcPlanes[0]+strides[0]*ph0;
 -              srcPlanes[2]=srcPlanes[1]+strides[1]*ph1;
 -      }
 -
 -      return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height,
 -              subsamp, jpegBuf, jpegSize, jpegQual, flags);
 -
 -      bailout:
 -      return retval;
 +  const unsigned char *srcPlanes[3];
 +  int pw0, ph0, strides[3], retval = -1;
 +  tjinstance *this = (tjinstance *)handle;
 +
 +  if (!this) _throwg("tjCompressFromYUV(): Invalid handle");
 +  this->isInstanceError = FALSE;
 +
 +  if (srcBuf == NULL || width <= 0 || pad < 1 || height <= 0 || subsamp < 0 ||
 +      subsamp >= NUMSUBOPT)
 +    _throw("tjCompressFromYUV(): Invalid argument");
 +
 +  pw0 = tjPlaneWidth(0, width, subsamp);
 +  ph0 = tjPlaneHeight(0, height, subsamp);
 +  srcPlanes[0] = srcBuf;
 +  strides[0] = PAD(pw0, pad);
 +  if (subsamp == TJSAMP_GRAY) {
 +    strides[1] = strides[2] = 0;
 +    srcPlanes[1] = srcPlanes[2] = NULL;
 +  } else {
 +    int pw1 = tjPlaneWidth(1, width, subsamp);
 +    int ph1 = tjPlaneHeight(1, height, subsamp);
 +
 +    strides[1] = strides[2] = PAD(pw1, pad);
 +    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
 +    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
 +  }
 +
 +  return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height,
 +                                 subsamp, jpegBuf, jpegSize, jpegQual, flags);
 +
 +bailout:
 +  return retval;
  }
  
  
@@@ -1193,92 -1379,123 +1199,94 @@@ DLLEXPORT tjscalingfactor *tjGetScaling
  }
  
  
 -DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle,
 -      const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
 -      int width, int pitch, int height, int pixelFormat, int flags)
 +DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
 +                            unsigned long jpegSize, unsigned char *dstBuf,
 +                            int width, int pitch, int height, int pixelFormat,
 +                            int flags)
  {
 -      int i, retval=0;  JSAMPROW *row_pointer=NULL;
 -      int jpegwidth, jpegheight, scaledw, scaledh;
 -      #ifndef JCS_EXTENSIONS
 -      unsigned char *rgbBuf=NULL;
 -      unsigned char *_dstBuf=NULL;  int _pitch=0;
 -      #endif
 +  JSAMPROW *row_pointer = NULL;
 +  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
  
 -      getdinstance(handle);
 -      if((this->init&DECOMPRESS)==0)
 -              _throw("tjDecompress2(): Instance has not been initialized for decompression");
 +  getdinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +  if ((this->init & DECOMPRESS) == 0)
 +    _throw("tjDecompress2(): Instance has not been initialized for decompression");
  
 -      if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pitch<0
 -              || height<0 || pixelFormat<0 || pixelFormat>=TJ_NUMPF)
 -              _throw("tjDecompress2(): Invalid argument");
 +  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 ||
 +      pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
 +    _throw("tjDecompress2(): Invalid argument");
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 -      jpeg_read_header(dinfo, TRUE);
 -      if(setDecompDefaults(dinfo, pixelFormat, flags)==-1)
 -      {
 -              retval=-1;  goto bailout;
 -      }
 -
 -      if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE;
 -
 -      jpegwidth=dinfo->image_width;  jpegheight=dinfo->image_height;
 -      if(width==0) width=jpegwidth;
 -      if(height==0) height=jpegheight;
 -      for(i=0; i<NUMSF; i++)
 -      {
 -              scaledw=TJSCALED(jpegwidth, sf[i]);
 -              scaledh=TJSCALED(jpegheight, sf[i]);
 -              if(scaledw<=width && scaledh<=height)
 -                      break;
 -      }
 -      if(i>=NUMSF)
 -              _throw("tjDecompress2(): Could not scale down to desired image dimensions");
 -      width=scaledw;  height=scaledh;
 -      dinfo->scale_num=sf[i].num;
 -      dinfo->scale_denom=sf[i].denom;
 -
 -      jpeg_start_decompress(dinfo);
 -      if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
 -
 -      #ifndef JCS_EXTENSIONS
 -      if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK &&
 -              (RGB_RED!=tjRedOffset[pixelFormat] ||
 -                      RGB_GREEN!=tjGreenOffset[pixelFormat] ||
 -                      RGB_BLUE!=tjBlueOffset[pixelFormat] ||
 -                      RGB_PIXELSIZE!=tjPixelSize[pixelFormat]))
 -      {
 -              rgbBuf=(unsigned char *)malloc(width*height*3);
 -              if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure");
 -              _pitch=pitch;  pitch=width*3;
 -              _dstBuf=dstBuf;  dstBuf=rgbBuf;
 -      }
 -      #endif
 -
 -      if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)
 -              *dinfo->output_height))==NULL)
 -              _throw("tjDecompress2(): Memory allocation failure");
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -      for(i=0; i<(int)dinfo->output_height; i++)
 -      {
 -              if(flags&TJFLAG_BOTTOMUP)
 -                      row_pointer[i]=&dstBuf[(dinfo->output_height-i-1)*pitch];
 -              else row_pointer[i]=&dstBuf[i*pitch];
 -      }
 -      while(dinfo->output_scanline<dinfo->output_height)
 -      {
 -              jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline],
 -                      dinfo->output_height-dinfo->output_scanline);
 -      }
 -      jpeg_finish_decompress(dinfo);
 -
 -      #ifndef JCS_EXTENSIONS
 -      fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat);
 -      #endif
 -
 -      bailout:
 -      if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
 -      #ifndef JCS_EXTENSIONS
 -      if(rgbBuf) free(rgbBuf);
 -      #endif
 -      if(row_pointer) free(row_pointer);
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 +  jpeg_read_header(dinfo, TRUE);
 +  this->dinfo.out_color_space = pf2cs[pixelFormat];
 +  if (flags & TJFLAG_FASTDCT) this->dinfo.dct_method = JDCT_FASTEST;
 +  if (flags & TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling = FALSE;
 +
 +  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
 +  if (width == 0) width = jpegwidth;
 +  if (height == 0) height = jpegheight;
 +  for (i = 0; i < NUMSF; i++) {
 +    scaledw = TJSCALED(jpegwidth, sf[i]);
 +    scaledh = TJSCALED(jpegheight, sf[i]);
 +    if (scaledw <= width && scaledh <= height)
 +      break;
 +  }
 +  if (i >= NUMSF)
 +    _throw("tjDecompress2(): Could not scale down to desired image dimensions");
 +  width = scaledw;  height = scaledh;
 +  dinfo->scale_num = sf[i].num;
 +  dinfo->scale_denom = sf[i].denom;
 +
 +  jpeg_start_decompress(dinfo);
 +  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
 +
 +  if ((row_pointer =
 +       (JSAMPROW *)malloc(sizeof(JSAMPROW) * dinfo->output_height)) == NULL)
 +    _throw("tjDecompress2(): Memory allocation failure");
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +  for (i = 0; i < (int)dinfo->output_height; i++) {
 +    if (flags & TJFLAG_BOTTOMUP)
 +      row_pointer[i] = &dstBuf[(dinfo->output_height - i - 1) * pitch];
 +    else
 +      row_pointer[i] = &dstBuf[i * pitch];
 +  }
 +  while (dinfo->output_scanline < dinfo->output_height)
 +    jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline],
 +                        dinfo->output_height - dinfo->output_scanline);
 +  jpeg_finish_decompress(dinfo);
 +
 +bailout:
 +  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
 +  if (row_pointer) free(row_pointer);
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, unsigned char *jpegBuf,
 -      unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch,
 -      int height, int pixelSize, int flags)
 +DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
 +                           unsigned long jpegSize, unsigned char *dstBuf,
 +                           int width, int pitch, int height, int pixelSize,
 +                           int flags)
  {
 -      if(flags&TJ_YUV)
 -              return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags);
 -      else
 -              return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch,
 -                      height, getPixelFormat(pixelSize, flags), flags);
 +  if (flags & TJ_YUV)
 +    return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags);
 +  else
 +    return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch,
 +                         height, getPixelFormat(pixelSize, flags), flags);
  }
  
  
@@@ -1332,428 -1552,465 +1340,432 @@@ void my_reset_marker_reader(j_decompres
  {
  }
  
 -DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle,
 -      const unsigned char **srcPlanes, const int *strides, int subsamp,
 -      unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat,
 -      int flags)
 +DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
 +                                const unsigned char **srcPlanes,
 +                                const int *strides, int subsamp,
 +                                unsigned char *dstBuf, int width, int pitch,
 +                                int height, int pixelFormat, int flags)
  {
 -      int i, retval=0;  JSAMPROW *row_pointer=NULL;
 -      JSAMPLE *_tmpbuf[MAX_COMPONENTS];
 -      JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
 -      int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
 -      JSAMPLE *ptr;
 -      jpeg_component_info *compptr;
 -      #ifndef JCS_EXTENSIONS
 -      unsigned char *rgbBuf=NULL;
 -      unsigned char *_dstBuf=NULL;  int _pitch=0;
 -      #endif
 -      int (*old_read_markers)(j_decompress_ptr);
 -      void (*old_reset_marker_reader)(j_decompress_ptr);
 -
 -      getdinstance(handle);
 -
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              tmpbuf[i]=NULL;  _tmpbuf[i]=NULL;  inbuf[i]=NULL;
 -      }
 -
 -      if((this->init&DECOMPRESS)==0)
 -              _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression");
 -
 -      if(!srcPlanes || !srcPlanes[0] || subsamp<0 || subsamp>=NUMSUBOPT
 -              || dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
 -              || pixelFormat>=TJ_NUMPF)
 -              _throw("tjDecodeYUVPlanes(): Invalid argument");
 -      if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
 -              _throw("tjDecodeYUVPlanes(): Invalid argument");
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      if(pixelFormat==TJPF_CMYK)
 -              _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels.");
 -
 -      if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
 -      dinfo->image_width=width;
 -      dinfo->image_height=height;
 +  JSAMPROW *row_pointer = NULL;
 +  JSAMPLE *_tmpbuf[MAX_COMPONENTS];
 +  JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
 +  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
 +  JSAMPLE *ptr;
 +  jpeg_component_info *compptr;
 +  int (*old_read_markers) (j_decompress_ptr);
 +  void (*old_reset_marker_reader) (j_decompress_ptr);
 +
 +  getdinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;  inbuf[i] = NULL;
 +  }
 +
 +  if ((this->init & DECOMPRESS) == 0)
 +    _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression");
 +
 +  if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT ||
 +      dstBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
 +      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
 +    _throw("tjDecodeYUVPlanes(): Invalid argument");
 +  if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
 +    _throw("tjDecodeYUVPlanes(): Invalid argument");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  if (pixelFormat == TJPF_CMYK)
 +    _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels.");
 +
 +  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
 +  dinfo->image_width = width;
 +  dinfo->image_height = height;
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1)
 -      {
 -              retval=-1;  goto bailout;
 -      }
 -      old_read_markers=dinfo->marker->read_markers;
 -      dinfo->marker->read_markers=my_read_markers;
 -      old_reset_marker_reader=dinfo->marker->reset_marker_reader;
 -      dinfo->marker->reset_marker_reader=my_reset_marker_reader;
 -      jpeg_read_header(dinfo, TRUE);
 -      dinfo->marker->read_markers=old_read_markers;
 -      dinfo->marker->reset_marker_reader=old_reset_marker_reader;
 -
 -      if(setDecompDefaults(dinfo, pixelFormat, flags)==-1)
 -      {
 -              retval=-1;  goto bailout;
 -      }
 -      dinfo->do_fancy_upsampling=FALSE;
 -      dinfo->Se=DCTSIZE2-1;
 -      jinit_master_decompress(dinfo);
 -      (*dinfo->upsample->start_pass)(dinfo);
 -
 -      pw0=PAD(width, dinfo->max_h_samp_factor);
 -      ph0=PAD(height, dinfo->max_v_samp_factor);
 -
 -      if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
 -
 -      #ifndef JCS_EXTENSIONS
 -      if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK &&
 -              (RGB_RED!=tjRedOffset[pixelFormat] ||
 -                      RGB_GREEN!=tjGreenOffset[pixelFormat] ||
 -                      RGB_BLUE!=tjBlueOffset[pixelFormat] ||
 -                      RGB_PIXELSIZE!=tjPixelSize[pixelFormat]))
 -      {
 -              rgbBuf=(unsigned char *)malloc(width*height*3);
 -              if(!rgbBuf) _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 -              _pitch=pitch;  pitch=width*3;
 -              _dstBuf=dstBuf;  dstBuf=rgbBuf;
 -      }
 -      #endif
 -
 -      if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL)
 -              _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 -      for(i=0; i<height; i++)
 -      {
 -              if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&dstBuf[(height-i-1)*pitch];
 -              else row_pointer[i]=&dstBuf[i*pitch];
 -      }
 -      if(height<ph0)
 -              for(i=height; i<ph0; i++) row_pointer[i]=row_pointer[height-1];
 -
 -      for(i=0; i<dinfo->num_components; i++)
 -      {
 -              compptr=&dinfo->comp_info[i];
 -              _tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
 -                      * compptr->v_samp_factor + 16);
 -              if(!_tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 -              tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
 -              if(!tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 -              for(row=0; row<compptr->v_samp_factor; row++)
 -              {
 -                      unsigned char *_tmpbuf_aligned=
 -                              (unsigned char *)PAD((size_t)_tmpbuf[i], 16);
 -                      tmpbuf[i][row]=&_tmpbuf_aligned[
 -                              PAD(compptr->width_in_blocks*DCTSIZE, 16) * row];
 -              }
 -              pw[i]=pw0*compptr->h_samp_factor/dinfo->max_h_samp_factor;
 -              ph[i]=ph0*compptr->v_samp_factor/dinfo->max_v_samp_factor;
 -              inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]);
 -              if(!inbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 -              ptr=(JSAMPLE *)srcPlanes[i];
 -              for(row=0; row<ph[i]; row++)
 -              {
 -                      inbuf[i][row]=ptr;
 -                      ptr+=(strides && strides[i]!=0)? strides[i]:pw[i];
 -              }
 -      }
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      for(row=0; row<ph0; row+=dinfo->max_v_samp_factor)
 -      {
 -              JDIMENSION inrow=0, outrow=0;
 -              for(i=0, compptr=dinfo->comp_info; i<dinfo->num_components; i++, compptr++)
 -                      jcopy_sample_rows(inbuf[i],
 -                              row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0,
 -                              compptr->v_samp_factor, pw[i]);
 -              (dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow,
 -                      dinfo->max_v_samp_factor, &row_pointer[row], &outrow,
 -                      dinfo->max_v_samp_factor);
 -      }
 -      jpeg_abort_decompress(dinfo);
 -
 -      #ifndef JCS_EXTENSIONS
 -      fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat);
 -      #endif
 -
 -      bailout:
 -      if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
 -      #ifndef JCS_EXTENSIONS
 -      if(rgbBuf) free(rgbBuf);
 -      #endif
 -      if(row_pointer) free(row_pointer);
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              if(tmpbuf[i]!=NULL) free(tmpbuf[i]);
 -              if(_tmpbuf[i]!=NULL) free(_tmpbuf[i]);
 -              if(inbuf[i]!=NULL) free(inbuf[i]);
 -      }
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (setDecodeDefaults(dinfo, pixelFormat, subsamp, flags) == -1) {
 +    retval = -1;  goto bailout;
 +  }
 +  old_read_markers = dinfo->marker->read_markers;
 +  dinfo->marker->read_markers = my_read_markers;
 +  old_reset_marker_reader = dinfo->marker->reset_marker_reader;
 +  dinfo->marker->reset_marker_reader = my_reset_marker_reader;
 +  jpeg_read_header(dinfo, TRUE);
 +  dinfo->marker->read_markers = old_read_markers;
 +  dinfo->marker->reset_marker_reader = old_reset_marker_reader;
 +
 +  this->dinfo.out_color_space = pf2cs[pixelFormat];
 +  if (flags & TJFLAG_FASTDCT) this->dinfo.dct_method = JDCT_FASTEST;
 +  dinfo->do_fancy_upsampling = FALSE;
 +  dinfo->Se = DCTSIZE2 - 1;
 +  jinit_master_decompress(dinfo);
 +  (*dinfo->upsample->start_pass) (dinfo);
 +
 +  pw0 = PAD(width, dinfo->max_h_samp_factor);
 +  ph0 = PAD(height, dinfo->max_v_samp_factor);
 +
 +  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
 +
 +  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
 +    _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 +  for (i = 0; i < height; i++) {
 +    if (flags & TJFLAG_BOTTOMUP)
 +      row_pointer[i] = &dstBuf[(height - i - 1) * pitch];
 +    else
 +      row_pointer[i] = &dstBuf[i * pitch];
 +  }
 +  if (height < ph0)
 +    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
 +
 +  for (i = 0; i < dinfo->num_components; i++) {
 +    compptr = &dinfo->comp_info[i];
 +    _tmpbuf[i] =
 +      (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
 +                        compptr->v_samp_factor + 32);
 +    if (!_tmpbuf[i])
 +      _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 +    tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
 +    if (!tmpbuf[i])
 +      _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 +    for (row = 0; row < compptr->v_samp_factor; row++) {
 +      unsigned char *_tmpbuf_aligned =
 +        (unsigned char *)PAD((size_t)_tmpbuf[i], 32);
 +
 +      tmpbuf[i][row] =
 +        &_tmpbuf_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
 +    }
 +    pw[i] = pw0 * compptr->h_samp_factor / dinfo->max_h_samp_factor;
 +    ph[i] = ph0 * compptr->v_samp_factor / dinfo->max_v_samp_factor;
 +    inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
 +    if (!inbuf[i])
 +      _throw("tjDecodeYUVPlanes(): Memory allocation failure");
 +    ptr = (JSAMPLE *)srcPlanes[i];
 +    for (row = 0; row < ph[i]; row++) {
 +      inbuf[i][row] = ptr;
 +      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
 +    }
 +  }
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  for (row = 0; row < ph0; row += dinfo->max_v_samp_factor) {
 +    JDIMENSION inrow = 0, outrow = 0;
 +
 +    for (i = 0, compptr = dinfo->comp_info; i < dinfo->num_components;
 +         i++, compptr++)
 +      jcopy_sample_rows(inbuf[i],
 +        row * compptr->v_samp_factor / dinfo->max_v_samp_factor, tmpbuf[i], 0,
 +        compptr->v_samp_factor, pw[i]);
 +    (dinfo->upsample->upsample) (dinfo, tmpbuf, &inrow,
 +                                 dinfo->max_v_samp_factor, &row_pointer[row],
 +                                 &outrow, dinfo->max_v_samp_factor);
 +  }
 +  jpeg_abort_decompress(dinfo);
 +
 +bailout:
 +  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
 +  if (row_pointer) free(row_pointer);
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    if (tmpbuf[i] != NULL) free(tmpbuf[i]);
 +    if (_tmpbuf[i] != NULL) free(_tmpbuf[i]);
 +    if (inbuf[i] != NULL) free(inbuf[i]);
 +  }
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
 -      int pad, int subsamp, unsigned char *dstBuf, int width, int pitch,
 -      int height, int pixelFormat, int flags)
 +DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
 +                          int pad, int subsamp, unsigned char *dstBuf,
 +                          int width, int pitch, int height, int pixelFormat,
 +                          int flags)
  {
 -      const unsigned char *srcPlanes[3];
 -      int pw0, ph0, strides[3], retval=-1;
 -
 -      if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT
 -              || width<=0 || height<=0)
 -              _throw("tjDecodeYUV(): Invalid argument");
 -
 -      pw0=tjPlaneWidth(0, width, subsamp);
 -      ph0=tjPlaneHeight(0, height, subsamp);
 -      srcPlanes[0]=srcBuf;
 -      strides[0]=PAD(pw0, pad);
 -      if(subsamp==TJSAMP_GRAY)
 -      {
 -              strides[1]=strides[2]=0;
 -              srcPlanes[1]=srcPlanes[2]=NULL;
 -      }
 -      else
 -      {
 -              int pw1=tjPlaneWidth(1, width, subsamp);
 -              int ph1=tjPlaneHeight(1, height, subsamp);
 -              strides[1]=strides[2]=PAD(pw1, pad);
 -              srcPlanes[1]=srcPlanes[0]+strides[0]*ph0;
 -              srcPlanes[2]=srcPlanes[1]+strides[1]*ph1;
 -      }
 -
 -      return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width,
 -              pitch, height, pixelFormat, flags);
 -
 -      bailout:
 -      return retval;
 +  const unsigned char *srcPlanes[3];
 +  int pw0, ph0, strides[3], retval = -1;
 +  tjinstance *this = (tjinstance *)handle;
 +
 +  if (!this) _throwg("tjDecodeYUV(): Invalid handle");
 +  this->isInstanceError = FALSE;
 +
 +  if (srcBuf == NULL || pad < 0 || !isPow2(pad) || subsamp < 0 ||
 +      subsamp >= NUMSUBOPT || width <= 0 || height <= 0)
 +    _throw("tjDecodeYUV(): Invalid argument");
 +
 +  pw0 = tjPlaneWidth(0, width, subsamp);
 +  ph0 = tjPlaneHeight(0, height, subsamp);
 +  srcPlanes[0] = srcBuf;
 +  strides[0] = PAD(pw0, pad);
 +  if (subsamp == TJSAMP_GRAY) {
 +    strides[1] = strides[2] = 0;
 +    srcPlanes[1] = srcPlanes[2] = NULL;
 +  } else {
 +    int pw1 = tjPlaneWidth(1, width, subsamp);
 +    int ph1 = tjPlaneHeight(1, height, subsamp);
 +
 +    strides[1] = strides[2] = PAD(pw1, pad);
 +    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
 +    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
 +  }
 +
 +  return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width,
 +                           pitch, height, pixelFormat, flags);
 +
 +bailout:
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle,
 -      const unsigned char *jpegBuf, unsigned long jpegSize,
 -      unsigned char **dstPlanes, int width, int *strides, int height, int flags)
 +DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
 +                                      const unsigned char *jpegBuf,
 +                                      unsigned long jpegSize,
 +                                      unsigned char **dstPlanes, int width,
 +                                      int *strides, int height, int flags)
  {
 -      int i, sfi, row, retval=0;  JSAMPROW *outbuf[MAX_COMPONENTS];
 -      int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh;
 -      int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
 -              tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
 -      JSAMPLE *_tmpbuf=NULL, *ptr;  JSAMPROW *tmpbuf[MAX_COMPONENTS];
 -      int dctsize;
 +  int i, sfi, row, retval = 0;
 +  int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh;
 +  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
 +    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
 +  JSAMPLE *_tmpbuf = NULL, *ptr;
 +  JSAMPROW *outbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
 +  int dctsize;
  
 -      getdinstance(handle);
 +  getdinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
  
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              tmpbuf[i]=NULL;  outbuf[i]=NULL;
 -      }
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    tmpbuf[i] = NULL;  outbuf[i] = NULL;
 +  }
  
 -      if((this->init&DECOMPRESS)==0)
 -              _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression");
 +  if ((this->init & DECOMPRESS) == 0)
 +    _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression");
  
 -      if(jpegBuf==NULL || jpegSize<=0 || !dstPlanes || !dstPlanes[0] || width<0
 -              || height<0)
 -              _throw("tjDecompressToYUVPlanes(): Invalid argument");
 +  if (jpegBuf == NULL || jpegSize <= 0 || !dstPlanes || !dstPlanes[0] ||
 +      width < 0 || height < 0)
 +    _throw("tjDecompressToYUVPlanes(): Invalid argument");
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      if(!this->headerRead)
 -      {
 -              jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 -              jpeg_read_header(dinfo, TRUE);
 -      }
 -      this->headerRead=0;
 -      jpegSubsamp=getSubsamp(dinfo);
 -      if(jpegSubsamp<0)
 -              _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image");
 -
 -      if(jpegSubsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
 -              _throw("tjDecompressToYUVPlanes(): Invalid argument");
 -
 -      jpegwidth=dinfo->image_width;  jpegheight=dinfo->image_height;
 -      if(width==0) width=jpegwidth;
 -      if(height==0) height=jpegheight;
 -      for(i=0; i<NUMSF; i++)
 -      {
 -              scaledw=TJSCALED(jpegwidth, sf[i]);
 -              scaledh=TJSCALED(jpegheight, sf[i]);
 -              if(scaledw<=width && scaledh<=height)
 -                      break;
 -      }
 -      if(i>=NUMSF)
 -              _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions");
 -      if(dinfo->num_components>3)
 -              _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components");
 -
 -      width=scaledw;  height=scaledh;
 -      dinfo->scale_num=sf[i].num;
 -      dinfo->scale_denom=sf[i].denom;
 -      sfi=i;
 -      jpeg_calc_output_dimensions(dinfo);
 -
 -      dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom;
 -
 -      for(i=0; i<dinfo->num_components; i++)
 -      {
 -              jpeg_component_info *compptr=&dinfo->comp_info[i];
 -              int ih;
 -              iw[i]=compptr->width_in_blocks*dctsize;
 -              ih=compptr->height_in_blocks*dctsize;
 -              pw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor)
 -                      *compptr->h_samp_factor/dinfo->max_h_samp_factor;
 -              ph[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor)
 -                      *compptr->v_samp_factor/dinfo->max_v_samp_factor;
 -              if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1;
 -              th[i]=compptr->v_samp_factor*dctsize;
 -              tmpbufsize+=iw[i]*th[i];
 -              if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL)
 -                      _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 -              ptr=dstPlanes[i];
 -              for(row=0; row<ph[i]; row++)
 -              {
 -                      outbuf[i][row]=ptr;
 -                      ptr+=(strides && strides[i]!=0)? strides[i]:pw[i];
 -              }
 -      }
 -      if(usetmpbuf)
 -      {
 -              if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
 -                      _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 -              ptr=_tmpbuf;
 -              for(i=0; i<dinfo->num_components; i++)
 -              {
 -                      if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
 -                              _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 -                      for(row=0; row<th[i]; row++)
 -                      {
 -                              tmpbuf[i][row]=ptr;
 -                              ptr+=iw[i];
 -                      }
 -              }
 -      }
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE;
 -      if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST;
 -      dinfo->raw_data_out=TRUE;
 -
 -      jpeg_start_decompress(dinfo);
 -      for(row=0; row<(int)dinfo->output_height;
 -              row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size)
 -      {
 -              JSAMPARRAY yuvptr[MAX_COMPONENTS];
 -              int crow[MAX_COMPONENTS];
 -              for(i=0; i<dinfo->num_components; i++)
 -              {
 -                      jpeg_component_info *compptr=&dinfo->comp_info[i];
 -                      if(jpegSubsamp==TJ_420)
 -                      {
 -                              /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
 -                                 to be clever and use the IDCT to perform upsampling on the U and V
 -                                 planes.  For instance, if the output image is to be scaled by 1/2
 -                                 relative to the JPEG image, then the scaling factor and upsampling
 -                                 effectively cancel each other, so a normal 8x8 IDCT can be used.
 -                                 However, this is not desirable when using the decompress-to-YUV
 -                                 functionality in TurboJPEG, since we want to output the U and V
 -                                 planes in their subsampled form.  Thus, we have to override some
 -                                 internal libjpeg parameters to force it to use the "scaled" IDCT
 -                                 functions on the U and V planes. */
 -                              compptr->_DCT_scaled_size=dctsize;
 -                              compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]*
 -                                      sf[sfi].num/sf[sfi].denom*
 -                                      compptr->v_samp_factor/dinfo->max_v_samp_factor;
 -                              dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
 -                      }
 -                      crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor;
 -                      if(usetmpbuf) yuvptr[i]=tmpbuf[i];
 -                      else yuvptr[i]=&outbuf[i][crow[i]];
 -              }
 -              jpeg_read_raw_data(dinfo, yuvptr,
 -                      dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size);
 -              if(usetmpbuf)
 -              {
 -                      int j;
 -                      for(i=0; i<dinfo->num_components; i++)
 -                      {
 -                              for(j=0; j<min(th[i], ph[i]-crow[i]); j++)
 -                              {
 -                                      memcpy(outbuf[i][crow[i]+j], tmpbuf[i][j], pw[i]);
 -                              }
 -                      }
 -              }
 -      }
 -      jpeg_finish_decompress(dinfo);
 -
 -      bailout:
 -      if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
 -      for(i=0; i<MAX_COMPONENTS; i++)
 -      {
 -              if(tmpbuf[i]) free(tmpbuf[i]);
 -              if(outbuf[i]) free(outbuf[i]);
 -      }
 -      if(_tmpbuf) free(_tmpbuf);
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  if (!this->headerRead) {
 +    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 +    jpeg_read_header(dinfo, TRUE);
 +  }
 +  this->headerRead = 0;
 +  jpegSubsamp = getSubsamp(dinfo);
 +  if (jpegSubsamp < 0)
 +    _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image");
 +
 +  if (jpegSubsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
 +    _throw("tjDecompressToYUVPlanes(): Invalid argument");
 +
 +  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
 +  if (width == 0) width = jpegwidth;
 +  if (height == 0) height = jpegheight;
 +  for (i = 0; i < NUMSF; i++) {
 +    scaledw = TJSCALED(jpegwidth, sf[i]);
 +    scaledh = TJSCALED(jpegheight, sf[i]);
 +    if (scaledw <= width && scaledh <= height)
 +      break;
 +  }
 +  if (i >= NUMSF)
 +    _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions");
 +  if (dinfo->num_components > 3)
 +    _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components");
 +
 +  width = scaledw;  height = scaledh;
 +  dinfo->scale_num = sf[i].num;
 +  dinfo->scale_denom = sf[i].denom;
 +  sfi = i;
 +  jpeg_calc_output_dimensions(dinfo);
 +
 +  dctsize = DCTSIZE * sf[sfi].num / sf[sfi].denom;
 +
 +  for (i = 0; i < dinfo->num_components; i++) {
 +    jpeg_component_info *compptr = &dinfo->comp_info[i];
 +    int ih;
 +
 +    iw[i] = compptr->width_in_blocks * dctsize;
 +    ih = compptr->height_in_blocks * dctsize;
 +    pw[i] = PAD(dinfo->output_width, dinfo->max_h_samp_factor) *
 +            compptr->h_samp_factor / dinfo->max_h_samp_factor;
 +    ph[i] = PAD(dinfo->output_height, dinfo->max_v_samp_factor) *
 +            compptr->v_samp_factor / dinfo->max_v_samp_factor;
 +    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
 +    th[i] = compptr->v_samp_factor * dctsize;
 +    tmpbufsize += iw[i] * th[i];
 +    if ((outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
 +      _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 +    ptr = dstPlanes[i];
 +    for (row = 0; row < ph[i]; row++) {
 +      outbuf[i][row] = ptr;
 +      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
 +    }
 +  }
 +  if (usetmpbuf) {
 +    if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
 +      _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 +    ptr = _tmpbuf;
 +    for (i = 0; i < dinfo->num_components; i++) {
 +      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
 +        _throw("tjDecompressToYUVPlanes(): Memory allocation failure");
 +      for (row = 0; row < th[i]; row++) {
 +        tmpbuf[i][row] = ptr;
 +        ptr += iw[i];
 +      }
 +    }
 +  }
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  if (flags & TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling = FALSE;
 +  if (flags & TJFLAG_FASTDCT) dinfo->dct_method = JDCT_FASTEST;
 +  dinfo->raw_data_out = TRUE;
 +
 +  jpeg_start_decompress(dinfo);
 +  for (row = 0; row < (int)dinfo->output_height;
 +       row += dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size) {
 +    JSAMPARRAY yuvptr[MAX_COMPONENTS];
 +    int crow[MAX_COMPONENTS];
 +
 +    for (i = 0; i < dinfo->num_components; i++) {
 +      jpeg_component_info *compptr = &dinfo->comp_info[i];
 +
 +      if (jpegSubsamp == TJ_420) {
 +        /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
 +           to be clever and use the IDCT to perform upsampling on the U and V
 +           planes.  For instance, if the output image is to be scaled by 1/2
 +           relative to the JPEG image, then the scaling factor and upsampling
 +           effectively cancel each other, so a normal 8x8 IDCT can be used.
 +           However, this is not desirable when using the decompress-to-YUV
 +           functionality in TurboJPEG, since we want to output the U and V
 +           planes in their subsampled form.  Thus, we have to override some
 +           internal libjpeg parameters to force it to use the "scaled" IDCT
 +           functions on the U and V planes. */
 +        compptr->_DCT_scaled_size = dctsize;
 +        compptr->MCU_sample_width = tjMCUWidth[jpegSubsamp] *
 +          sf[sfi].num / sf[sfi].denom *
 +          compptr->v_samp_factor / dinfo->max_v_samp_factor;
 +        dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
 +      }
 +      crow[i] = row * compptr->v_samp_factor / dinfo->max_v_samp_factor;
 +      if (usetmpbuf) yuvptr[i] = tmpbuf[i];
 +      else yuvptr[i] = &outbuf[i][crow[i]];
 +    }
 +    jpeg_read_raw_data(dinfo, yuvptr,
 +                       dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size);
 +    if (usetmpbuf) {
 +      int j;
 +
 +      for (i = 0; i < dinfo->num_components; i++) {
 +        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
 +          memcpy(outbuf[i][crow[i] + j], tmpbuf[i][j], pw[i]);
 +        }
 +      }
 +    }
 +  }
 +  jpeg_finish_decompress(dinfo);
 +
 +bailout:
 +  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
 +  for (i = 0; i < MAX_COMPONENTS; i++) {
 +    if (tmpbuf[i]) free(tmpbuf[i]);
 +    if (outbuf[i]) free(outbuf[i]);
 +  }
 +  if (_tmpbuf) free(_tmpbuf);
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle,
 -      const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
 -      int width, int pad, int height, int flags)
 +DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
 +                                 unsigned long jpegSize, unsigned char *dstBuf,
 +                                 int width, int pad, int height, int flags)
  {
 -      unsigned char *dstPlanes[3];
 -      int pw0, ph0, strides[3], retval=-1, jpegSubsamp=-1;
 -      int i, jpegwidth, jpegheight, scaledw, scaledh;
 -
 -      getdinstance(handle);
 -
 -      if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1
 -              || !isPow2(pad) || height<0)
 -              _throw("tjDecompressToYUV2(): Invalid argument");
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              return -1;
 -      }
 -
 -      jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 -      jpeg_read_header(dinfo, TRUE);
 -      jpegSubsamp=getSubsamp(dinfo);
 -      if(jpegSubsamp<0)
 -              _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image");
 -
 -      jpegwidth=dinfo->image_width;  jpegheight=dinfo->image_height;
 -      if(width==0) width=jpegwidth;
 -      if(height==0) height=jpegheight;
 -
 -      for(i=0; i<NUMSF; i++)
 -      {
 -              scaledw=TJSCALED(jpegwidth, sf[i]);
 -              scaledh=TJSCALED(jpegheight, sf[i]);
 -              if(scaledw<=width && scaledh<=height)
 -                      break;
 -      }
 -      if(i>=NUMSF)
 -              _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions");
 -
 -      pw0=tjPlaneWidth(0, width, jpegSubsamp);
 -      ph0=tjPlaneHeight(0, height, jpegSubsamp);
 -      dstPlanes[0]=dstBuf;
 -      strides[0]=PAD(pw0, pad);
 -      if(jpegSubsamp==TJSAMP_GRAY)
 -      {
 -              strides[1]=strides[2]=0;
 -              dstPlanes[1]=dstPlanes[2]=NULL;
 -      }
 -      else
 -      {
 -              int pw1=tjPlaneWidth(1, width, jpegSubsamp);
 -              int ph1=tjPlaneHeight(1, height, jpegSubsamp);
 -              strides[1]=strides[2]=PAD(pw1, pad);
 -              dstPlanes[1]=dstPlanes[0]+strides[0]*ph0;
 -              dstPlanes[2]=dstPlanes[1]+strides[1]*ph1;
 -      }
 -
 -      this->headerRead=1;
 -      return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width,
 -              strides, height, flags);
 -
 -      bailout:
 -      return retval;
 -
 +  unsigned char *dstPlanes[3];
 +  int pw0, ph0, strides[3], retval = -1, jpegSubsamp = -1;
 +  int i, jpegwidth, jpegheight, scaledw, scaledh;
 +
 +  getdinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +
 +  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 ||
 +      pad < 1 || !isPow2(pad) || height < 0)
 +    _throw("tjDecompressToYUV2(): Invalid argument");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    return -1;
 +  }
 +
 +  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 +  jpeg_read_header(dinfo, TRUE);
 +  jpegSubsamp = getSubsamp(dinfo);
 +  if (jpegSubsamp < 0)
 +    _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image");
 +
 +  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
 +  if (width == 0) width = jpegwidth;
 +  if (height == 0) height = jpegheight;
 +
 +  for (i = 0; i < NUMSF; i++) {
 +    scaledw = TJSCALED(jpegwidth, sf[i]);
 +    scaledh = TJSCALED(jpegheight, sf[i]);
 +    if (scaledw <= width && scaledh <= height)
 +      break;
 +  }
 +  if (i >= NUMSF)
 +    _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions");
 +
 +  pw0 = tjPlaneWidth(0, width, jpegSubsamp);
 +  ph0 = tjPlaneHeight(0, height, jpegSubsamp);
 +  dstPlanes[0] = dstBuf;
 +  strides[0] = PAD(pw0, pad);
 +  if (jpegSubsamp == TJSAMP_GRAY) {
 +    strides[1] = strides[2] = 0;
 +    dstPlanes[1] = dstPlanes[2] = NULL;
 +  } else {
 +    int pw1 = tjPlaneWidth(1, width, jpegSubsamp);
 +    int ph1 = tjPlaneHeight(1, height, jpegSubsamp);
 +
 +    strides[1] = strides[2] = PAD(pw1, pad);
 +    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
 +    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
 +  }
 +
 +  this->headerRead = 1;
 +  return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width,
 +                                 strides, height, flags);
 +
 +bailout:
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
  }
  
 -DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
 -      unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
 -      int flags)
 +DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
 +                                unsigned long jpegSize, unsigned char *dstBuf,
 +                                int flags)
  {
 -      return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
 +  return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
  }
  
  
@@@ -1778,314 -2033,157 +1790,316 @@@ DLLEXPORT tjhandle tjInitTransform(void
  }
  
  
 -DLLEXPORT int DLLCALL tjTransform(tjhandle handle,
 -      const unsigned char *jpegBuf, unsigned long jpegSize, int n,
 -      unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *t, int flags)
 +DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
 +                          unsigned long jpegSize, int n,
 +                          unsigned char **dstBufs, unsigned long *dstSizes,
 +                          tjtransform *t, int flags)
  {
 -      jpeg_transform_info *xinfo=NULL;
 -      jvirt_barray_ptr *srccoefs, *dstcoefs;
 -      int retval=0, i, jpegSubsamp;
 +  jpeg_transform_info *xinfo = NULL;
 +  jvirt_barray_ptr *srccoefs, *dstcoefs;
 +  int retval = 0, i, jpegSubsamp, saveMarkers = 0;
  
 -      getinstance(handle);
 -      if((this->init&COMPRESS)==0 || (this->init&DECOMPRESS)==0)
 -              _throw("tjTransform(): Instance has not been initialized for transformation");
 +  getinstance(handle);
 +  this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 +  if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0)
 +    _throw("tjTransform(): Instance has not been initialized for transformation");
  
 -      if(jpegBuf==NULL || jpegSize<=0 || n<1 || dstBufs==NULL || dstSizes==NULL
 -              || t==NULL || flags<0)
 -              _throw("tjTransform(): Invalid argument");
 +  if (jpegBuf == NULL || jpegSize <= 0 || n < 1 || dstBufs == NULL ||
 +      dstSizes == NULL || t == NULL || flags < 0)
 +    _throw("tjTransform(): Invalid argument");
  
 -      if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 -      else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 -      else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #ifndef NO_PUTENV
 +  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
 +  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
 +  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+ #endif
  
 -      if((xinfo=(jpeg_transform_info *)malloc(sizeof(jpeg_transform_info)*n))
 -              ==NULL)
 -              _throw("tjTransform(): Memory allocation failure");
 -      MEMZERO(xinfo, sizeof(jpeg_transform_info)*n);
 -
 -      if(setjmp(this->jerr.setjmp_buffer))
 -      {
 -              /* If we get here, the JPEG code has signaled an error. */
 -              retval=-1;  goto bailout;
 -      }
 -
 -      jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 -
 -      for(i=0; i<n; i++)
 -      {
 -              xinfo[i].transform=xformtypes[t[i].op];
 -              xinfo[i].perfect=(t[i].options&TJXOPT_PERFECT)? 1:0;
 -              xinfo[i].trim=(t[i].options&TJXOPT_TRIM)? 1:0;
 -              xinfo[i].force_grayscale=(t[i].options&TJXOPT_GRAY)? 1:0;
 -              xinfo[i].crop=(t[i].options&TJXOPT_CROP)? 1:0;
 -              if(n!=1 && t[i].op==TJXOP_HFLIP) xinfo[i].slow_hflip=1;
 -              else xinfo[i].slow_hflip=0;
 -
 -              if(xinfo[i].crop)
 -              {
 -                      xinfo[i].crop_xoffset=t[i].r.x;  xinfo[i].crop_xoffset_set=JCROP_POS;
 -                      xinfo[i].crop_yoffset=t[i].r.y;  xinfo[i].crop_yoffset_set=JCROP_POS;
 -                      if(t[i].r.w!=0)
 -                      {
 -                              xinfo[i].crop_width=t[i].r.w;  xinfo[i].crop_width_set=JCROP_POS;
 -                      }
 -                      else xinfo[i].crop_width=JCROP_UNSET;
 -                      if(t[i].r.h!=0)
 -                      {
 -                              xinfo[i].crop_height=t[i].r.h;  xinfo[i].crop_height_set=JCROP_POS;
 -                      }
 -                      else xinfo[i].crop_height=JCROP_UNSET;
 -              }
 -      }
 -
 -      jcopy_markers_setup(dinfo, JCOPYOPT_ALL);
 -      jpeg_read_header(dinfo, TRUE);
 -      jpegSubsamp=getSubsamp(dinfo);
 -      if(jpegSubsamp<0)
 -              _throw("tjTransform(): Could not determine subsampling type for JPEG image");
 -
 -      for(i=0; i<n; i++)
 -      {
 -              if(!jtransform_request_workspace(dinfo, &xinfo[i]))
 -                      _throw("tjTransform(): Transform is not perfect");
 -
 -              if(xinfo[i].crop)
 -              {
 -                      if((t[i].r.x%xinfo[i].iMCU_sample_width)!=0
 -                              || (t[i].r.y%xinfo[i].iMCU_sample_height)!=0)
 -                      {
 -                              snprintf(errStr, JMSG_LENGTH_MAX,
 -                                      "To crop this JPEG image, x must be a multiple of %d\n"
 -                                      "and y must be a multiple of %d.\n",
 -                                      xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height);
 -                              retval=-1;  goto bailout;
 -                      }
 -              }
 -      }
 -
 -      srccoefs=jpeg_read_coefficients(dinfo);
 -
 -      for(i=0; i<n; i++)
 -      {
 -              int w, h, alloc=1;
 -              if(!xinfo[i].crop)
 -              {
 -                      w=dinfo->image_width;  h=dinfo->image_height;
 -              }
 -              else
 -              {
 -                      w=xinfo[i].crop_width;  h=xinfo[i].crop_height;
 -              }
 -              if(flags&TJFLAG_NOREALLOC)
 -              {
 -                      alloc=0;  dstSizes[i]=tjBufSize(w, h, jpegSubsamp);
 -              }
 -              if(!(t[i].options&TJXOPT_NOOUTPUT))
 -                      jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
 -              jpeg_copy_critical_parameters(dinfo, cinfo);
 -              dstcoefs=jtransform_adjust_parameters(dinfo, cinfo, srccoefs,
 -                      &xinfo[i]);
 -              if(!(t[i].options&TJXOPT_NOOUTPUT))
 -              {
 -                      jpeg_write_coefficients(cinfo, dstcoefs);
 -                      jcopy_markers_execute(dinfo, cinfo, JCOPYOPT_ALL);
 -              }
 -              else jinit_c_master_control(cinfo, TRUE);
 -              jtransform_execute_transformation(dinfo, cinfo, srccoefs,
 -                      &xinfo[i]);
 -              if(t[i].customFilter)
 -              {
 -                      int ci, y;  JDIMENSION by;
 -                      for(ci=0; ci<cinfo->num_components; ci++)
 -                      {
 -                              jpeg_component_info *compptr=&cinfo->comp_info[ci];
 -                              tjregion arrayRegion={0, 0, compptr->width_in_blocks*DCTSIZE,
 -                                      DCTSIZE};
 -                              tjregion planeRegion={0, 0, compptr->width_in_blocks*DCTSIZE,
 -                                      compptr->height_in_blocks*DCTSIZE};
 -                              for(by=0; by<compptr->height_in_blocks; by+=compptr->v_samp_factor)
 -                              {
 -                                      JBLOCKARRAY barray=(dinfo->mem->access_virt_barray)
 -                                              ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor,
 -                                              TRUE);
 -                                      for(y=0; y<compptr->v_samp_factor; y++)
 -                                      {
 -                                              if(t[i].customFilter(barray[y][0], arrayRegion, planeRegion,
 -                                                      ci, i, &t[i])==-1)
 -                                                      _throw("tjTransform(): Error in custom filter");
 -                                              arrayRegion.y+=DCTSIZE;
 -                                      }
 -                              }
 -                      }
 -              }
 -              if(!(t[i].options&TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo);
 -      }
 -
 -      jpeg_finish_decompress(dinfo);
 -
 -      bailout:
 -      if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
 -      if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
 -      if(xinfo) free(xinfo);
 -      if(this->jerr.warning) retval=-1;
 -      return retval;
 +  if ((xinfo =
 +       (jpeg_transform_info *)malloc(sizeof(jpeg_transform_info) * n)) == NULL)
 +    _throw("tjTransform(): Memory allocation failure");
 +  MEMZERO(xinfo, sizeof(jpeg_transform_info) * n);
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
 +
 +  for (i = 0; i < n; i++) {
 +    xinfo[i].transform = xformtypes[t[i].op];
 +    xinfo[i].perfect = (t[i].options & TJXOPT_PERFECT) ? 1 : 0;
 +    xinfo[i].trim = (t[i].options & TJXOPT_TRIM) ? 1 : 0;
 +    xinfo[i].force_grayscale = (t[i].options & TJXOPT_GRAY) ? 1 : 0;
 +    xinfo[i].crop = (t[i].options & TJXOPT_CROP) ? 1 : 0;
 +    if (n != 1 && t[i].op == TJXOP_HFLIP) xinfo[i].slow_hflip = 1;
 +    else xinfo[i].slow_hflip = 0;
 +
 +    if (xinfo[i].crop) {
 +      xinfo[i].crop_xoffset = t[i].r.x;  xinfo[i].crop_xoffset_set = JCROP_POS;
 +      xinfo[i].crop_yoffset = t[i].r.y;  xinfo[i].crop_yoffset_set = JCROP_POS;
 +      if (t[i].r.w != 0) {
 +        xinfo[i].crop_width = t[i].r.w;  xinfo[i].crop_width_set = JCROP_POS;
 +      } else
 +        xinfo[i].crop_width = JCROP_UNSET;
 +      if (t[i].r.h != 0) {
 +        xinfo[i].crop_height = t[i].r.h;  xinfo[i].crop_height_set = JCROP_POS;
 +      } else
 +        xinfo[i].crop_height = JCROP_UNSET;
 +    }
 +    if (!(t[i].options & TJXOPT_COPYNONE)) saveMarkers = 1;
 +  }
 +
 +  jcopy_markers_setup(dinfo, saveMarkers ? JCOPYOPT_ALL : JCOPYOPT_NONE);
 +  jpeg_read_header(dinfo, TRUE);
 +  jpegSubsamp = getSubsamp(dinfo);
 +  if (jpegSubsamp < 0)
 +    _throw("tjTransform(): Could not determine subsampling type for JPEG image");
 +
 +  for (i = 0; i < n; i++) {
 +    if (!jtransform_request_workspace(dinfo, &xinfo[i]))
 +      _throw("tjTransform(): Transform is not perfect");
 +
 +    if (xinfo[i].crop) {
 +      if ((t[i].r.x % xinfo[i].iMCU_sample_width) != 0 ||
 +          (t[i].r.y % xinfo[i].iMCU_sample_height) != 0) {
 +        snprintf(errStr, JMSG_LENGTH_MAX,
 +                 "To crop this JPEG image, x must be a multiple of %d\n"
 +                 "and y must be a multiple of %d.\n",
 +                 xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height);
 +        retval = -1;  goto bailout;
 +      }
 +    }
 +  }
 +
 +  srccoefs = jpeg_read_coefficients(dinfo);
 +
 +  for (i = 0; i < n; i++) {
 +    int w, h, alloc = 1;
 +
 +    if (!xinfo[i].crop) {
 +      w = dinfo->image_width;  h = dinfo->image_height;
 +    } else {
 +      w = xinfo[i].crop_width;  h = xinfo[i].crop_height;
 +    }
 +    if (flags & TJFLAG_NOREALLOC) {
 +      alloc = 0;  dstSizes[i] = tjBufSize(w, h, jpegSubsamp);
 +    }
 +    if (!(t[i].options & TJXOPT_NOOUTPUT))
 +      jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
 +    jpeg_copy_critical_parameters(dinfo, cinfo);
 +    dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]);
 +    if (flags & TJFLAG_PROGRESSIVE || t[i].options & TJXOPT_PROGRESSIVE)
 +      jpeg_simple_progression(cinfo);
 +    if (!(t[i].options & TJXOPT_NOOUTPUT)) {
 +      jpeg_write_coefficients(cinfo, dstcoefs);
 +      jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ?
 +                                          JCOPYOPT_NONE : JCOPYOPT_ALL);
 +    } else
 +      jinit_c_master_control(cinfo, TRUE);
 +    jtransform_execute_transformation(dinfo, cinfo, srccoefs, &xinfo[i]);
 +    if (t[i].customFilter) {
 +      int ci, y;
 +      JDIMENSION by;
 +
 +      for (ci = 0; ci < cinfo->num_components; ci++) {
 +        jpeg_component_info *compptr = &cinfo->comp_info[ci];
 +        tjregion arrayRegion = {
 +          0, 0, compptr->width_in_blocks * DCTSIZE, DCTSIZE
 +        };
 +        tjregion planeRegion = {
 +          0, 0, compptr->width_in_blocks * DCTSIZE,
 +          compptr->height_in_blocks * DCTSIZE
 +        };
 +
 +        for (by = 0; by < compptr->height_in_blocks;
 +             by += compptr->v_samp_factor) {
 +          JBLOCKARRAY barray = (dinfo->mem->access_virt_barray)
 +            ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor,
 +             TRUE);
 +
 +          for (y = 0; y < compptr->v_samp_factor; y++) {
 +            if (t[i].customFilter(barray[y][0], arrayRegion, planeRegion, ci,
 +                                  i, &t[i]) == -1)
 +              _throw("tjTransform(): Error in custom filter");
 +            arrayRegion.y += DCTSIZE;
 +          }
 +        }
 +      }
 +    }
 +    if (!(t[i].options & TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo);
 +  }
 +
 +  jpeg_finish_decompress(dinfo);
 +
 +bailout:
 +  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
 +  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
 +  if (xinfo) free(xinfo);
 +  if (this->jerr.warning) retval = -1;
 +  this->jerr.stopOnWarning = FALSE;
 +  return retval;
 +}
 +
 +
 +DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
 +                                     int align, int *height, int *pixelFormat,
 +                                     int flags)
 +{
 +  int retval = 0, tempc, pitch;
 +  tjhandle handle = NULL;
 +  tjinstance *this;
 +  j_compress_ptr cinfo = NULL;
 +  cjpeg_source_ptr src;
 +  unsigned char *dstBuf = NULL;
 +  FILE *file = NULL;
 +  boolean invert;
 +
 +  if (!filename || !width || align < 1 || !height || !pixelFormat ||
 +      *pixelFormat < TJPF_UNKNOWN || *pixelFormat >= TJ_NUMPF)
 +    _throwg("tjLoadImage(): Invalid argument");
 +  if ((align & (align - 1)) != 0)
 +    _throwg("tjLoadImage(): Alignment must be a power of 2");
 +
 +  if ((handle = tjInitCompress()) == NULL) return NULL;
 +  this = (tjinstance *)handle;
 +  cinfo = &this->cinfo;
 +
 +  if ((file = fopen(filename, "rb")) == NULL)
 +    _throwunix("tjLoadImage(): Cannot open input file");
 +
 +  if ((tempc = getc(file)) < 0 || ungetc(tempc, file) == EOF)
 +    _throwunix("tjLoadImage(): Could not read input file")
 +  else if (tempc == EOF)
 +    _throwg("tjLoadImage(): Input file contains no data");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  if (*pixelFormat == TJPF_UNKNOWN) cinfo->in_color_space = JCS_UNKNOWN;
 +  else cinfo->in_color_space = pf2cs[*pixelFormat];
 +  if (tempc == 'B') {
 +    if ((src = jinit_read_bmp(cinfo, FALSE)) == NULL)
 +      _throwg("tjLoadImage(): Could not initialize bitmap loader");
 +    invert = (flags & TJFLAG_BOTTOMUP) == 0;
 +  } else if (tempc == 'P') {
 +    if ((src = jinit_read_ppm(cinfo)) == NULL)
 +      _throwg("tjLoadImage(): Could not initialize bitmap loader");
 +    invert = (flags & TJFLAG_BOTTOMUP) != 0;
 +  } else
 +    _throwg("tjLoadImage(): Unsupported file type");
 +
 +  src->input_file = file;
 +  (*src->start_input) (cinfo, src);
 +  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
 +
 +  *width = cinfo->image_width;  *height = cinfo->image_height;
 +  *pixelFormat = cs2pf[cinfo->in_color_space];
 +
 +  pitch = PAD((*width) * tjPixelSize[*pixelFormat], align);
 +  if ((dstBuf = (unsigned char *)malloc(pitch * (*height))) == NULL)
 +    _throwg("tjLoadImage(): Memory allocation failure");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  while (cinfo->next_scanline < cinfo->image_height) {
 +    int i, nlines = (*src->get_pixel_rows) (cinfo, src);
 +
 +    for (i = 0; i < nlines; i++) {
 +      unsigned char *dstptr;
 +      int row;
 +
 +      row = cinfo->next_scanline + i;
 +      if (invert) dstptr = &dstBuf[((*height) - row - 1) * pitch];
 +      else dstptr = &dstBuf[row * pitch];
 +      memcpy(dstptr, src->buffer[i], (*width) * tjPixelSize[*pixelFormat]);
 +    }
 +    cinfo->next_scanline += nlines;
 +  }
 +
 +  (*src->finish_input) (cinfo, src);
 +
 +bailout:
 +  if (handle) tjDestroy(handle);
 +  if (file) fclose(file);
 +  if (retval < 0 && dstBuf) { free(dstBuf);  dstBuf = NULL; }
 +  return dstBuf;
 +}
 +
 +
 +DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
 +                          int width, int pitch, int height, int pixelFormat,
 +                          int flags)
 +{
 +  int retval = 0;
 +  tjhandle handle = NULL;
 +  tjinstance *this;
 +  j_decompress_ptr dinfo = NULL;
 +  djpeg_dest_ptr dst;
 +  FILE *file = NULL;
 +  char *ptr = NULL;
 +  boolean invert;
 +
 +  if (!filename || !buffer || width < 1 || pitch < 0 || height < 1 ||
 +      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
 +    _throwg("tjSaveImage(): Invalid argument");
 +
 +  if ((handle = tjInitDecompress()) == NULL)
 +    return -1;
 +  this = (tjinstance *)handle;
 +  dinfo = &this->dinfo;
 +
 +  if ((file = fopen(filename, "wb")) == NULL)
 +    _throwunix("tjSaveImage(): Cannot open output file");
 +
 +  if (setjmp(this->jerr.setjmp_buffer)) {
 +    /* If we get here, the JPEG code has signaled an error. */
 +    retval = -1;  goto bailout;
 +  }
 +
 +  this->dinfo.out_color_space = pf2cs[pixelFormat];
 +  dinfo->image_width = width;  dinfo->image_height = height;
 +  dinfo->global_state = DSTATE_READY;
 +  dinfo->scale_num = dinfo->scale_denom = 1;
 +
 +  ptr = strrchr(filename, '.');
 +  if (ptr && !strcasecmp(ptr, ".bmp")) {
 +    if ((dst = jinit_write_bmp(dinfo, FALSE, FALSE)) == NULL)
 +      _throwg("tjSaveImage(): Could not initialize bitmap writer");
 +    invert = (flags & TJFLAG_BOTTOMUP) == 0;
 +  } else {
 +    if ((dst = jinit_write_ppm(dinfo)) == NULL)
 +      _throwg("tjSaveImage(): Could not initialize PPM writer");
 +    invert = (flags & TJFLAG_BOTTOMUP) != 0;
 +  }
 +
 +  dst->output_file = file;
 +  (*dst->start_output) (dinfo, dst);
 +  (*dinfo->mem->realize_virt_arrays) ((j_common_ptr)dinfo);
 +
 +  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
 +
 +  while (dinfo->output_scanline < dinfo->output_height) {
 +    unsigned char *rowptr;
 +
 +    if (invert)
 +      rowptr = &buffer[(height - dinfo->output_scanline - 1) * pitch];
 +    else
 +      rowptr = &buffer[dinfo->output_scanline * pitch];
 +    memcpy(dst->buffer[0], rowptr, width * tjPixelSize[pixelFormat]);
 +    (*dst->put_pixel_rows) (dinfo, dst, 1);
 +    dinfo->output_scanline++;
 +  }
 +
 +  (*dst->finish_output) (dinfo, dst);
 +
 +bailout:
 +  if (handle) tjDestroy(handle);
 +  if (file) fclose(file);
 +  return retval;
  }