From fbb674707e01a61e8b7083faa632dfd46c568b13 Mon Sep 17 00:00:00 2001 From: DRC Date: Wed, 24 Nov 2010 04:02:37 +0000 Subject: [PATCH] Merge the TurboJPEG planar YUV encoding feature from VirtualGL 2.2 git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@305 632fc199-4ca6-4c93-a231-07263d6284db --- CMakeLists.txt | 2 + ChangeLog.txt | 3 + Makefile.am | 1 + jpegut.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++-- jpgtest.cxx | 29 +++++++-- turbojpeg.h | 9 +++ turbojpegl.c | 146 +++++++++++++++++++++++++++++++++++++++---- 7 files changed, 333 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cf86b57..a6c6cd7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,6 +209,7 @@ add_executable(wrjpgcom rdjpgcom.c) enable_testing() add_test(jpegut jpegut) +add_test(jpegut-yuv jpegut -yuv) add_test(cjpeg-int sharedlib/cjpeg -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testorig.ppm) add_test(cjpeg-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimgint.jpg testoutint.jpg) add_test(cjpeg-fast sharedlib/cjpeg -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testorig.ppm) @@ -251,6 +252,7 @@ add_test(jpegtran-crop sharedlib/jpegtran -crop 120x90+20+50 -transpose -perfect add_test(jpegtran-crop-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimgcrop.jpg testoutcrop.jpg) add_test(jpegut-static jpegut-static) +add_test(jpegut-static-yuv jpegut-static -yuv) add_test(cjpeg-static-int cjpeg-static -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testorig.ppm) add_test(cjpeg-static-int-cmp ${CMAKE_COMMAND} -E compare_files ${CMAKE_SOURCE_DIR}/testimgint.jpg testoutint.jpg) add_test(cjpeg-static-fast cjpeg-static -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testorig.ppm) diff --git a/ChangeLog.txt b/ChangeLog.txt index f250c8a..7a7a671 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -21,6 +21,9 @@ when the library is built with libjpeg v6b emulation. [7] Added arithmetic encoding and decoding support (can be disabled via configure or CMake options) +[8] TurboJPEG/OSS can now leverage the SIMD-accelerated color conversion +routines in libjpeg-turbo to generate planar YUV images from RGB input. + Significant changes since 1.0.0 =============================== diff --git a/Makefile.am b/Makefile.am index 74df225..2f7e773 100644 --- a/Makefile.am +++ b/Makefile.am @@ -124,6 +124,7 @@ dist-hook: test: testclean all ./jpegut + ./jpegut -yuv ./cjpeg -dct int -outfile testoutint.jpg $(srcdir)/testorig.ppm cmp $(srcdir)/testimgint.jpg testoutint.jpg ./cjpeg -dct fast -opt -outfile testoutfst.jpg $(srcdir)/testorig.ppm diff --git a/jpegut.c b/jpegut.c index db46b6d..3344ca8 100644 --- a/jpegut.c +++ b/jpegut.c @@ -1,6 +1,6 @@ /* Copyright (C)2004 Landmark Graphics Corporation * Copyright (C)2005 Sun Microsystems, Inc. - * Copyright (C)2009 D. R. Commander + * Copyright (C)2009-2010 D. R. Commander * * This library is free software and may be redistributed and/or modified under * the terms of the wxWindows Library License, Version 3.1 or (at your option) @@ -18,11 +18,18 @@ #include #include "./rrtimer.h" #include "./turbojpeg.h" +#ifndef _WIN32 + #define stricmp strcasecmp +#endif #define _catch(f) {if((f)==-1) {printf("TJPEG: %s\n", tjGetErrorStr()); bailout();}} const char *_subnamel[NUMSUBOPT]={"4:4:4", "4:2:2", "4:2:0", "GRAY"}; const char *_subnames[NUMSUBOPT]={"444", "422", "420", "GRAY"}; +const int _hsf[NUMSUBOPT]={1, 2, 2, 1}; +const int _vsf[NUMSUBOPT]={1, 1, 2, 1}; + +int yuv=0; int exitstatus=0; #define bailout() {exitstatus=-1; goto finally;} @@ -197,6 +204,126 @@ int checkbuf(unsigned char *buf, int w, int h, int ps, int subsamp, int flags) return 1; } +#define checkval(v, cv) { \ + if(vcv+1) { \ + printf("\nComp. %s at %d,%d should be %d, not %d\n", #v, i, j, cv, v); \ + retval=0; goto bailout; \ + }} + +#define checkval0(v) { \ + if(v>1) { \ + printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, i, j, v); \ + retval=0; goto bailout; \ + }} + +#define checkval255(v) { \ + if(v<254 && !(v==217 && i==0 && j==21)) { \ + printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, i, j, v); \ + retval=0; goto bailout; \ + }} + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) + +int checkbufyuv(unsigned char *buf, unsigned long size, int w, int h, + int subsamp) +{ + int i, j; + int hsf=_hsf[subsamp], vsf=_vsf[subsamp]; + int pw=PAD(w, hsf), ph=PAD(h, vsf); + int cw=pw/hsf, ch=ph/vsf; + int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4); + int retval=1; + unsigned long correctsize=ypitch*ph + (subsamp==TJ_GRAYSCALE? 0:uvpitch*ch*2); + + if(size!=correctsize) + { + printf("\nIncorrect size %lu. Should be %lu\n", size, correctsize); + retval=0; goto bailout; + } + + for(i=0; i<16; i++) + { + for(j=0; j %s Q%d ... ", pixformat, - (flags&TJ_BOTTOMUP)?"Bottom-Up":"Top-Down ", _subnamel[subsamp], qual); + if(yuv) + printf("%s %s -> %s YUV ... ", pixformat, + (flags&TJ_BOTTOMUP)?"Bottom-Up":"Top-Down ", _subnamel[subsamp]); + else + printf("%s %s -> %s Q%d ... ", pixformat, + (flags&TJ_BOTTOMUP)?"Bottom-Up":"Top-Down ", _subnamel[subsamp], qual); if((bmpbuf=(unsigned char *)malloc(w*h*ps+1))==NULL) { @@ -246,10 +379,20 @@ void gentestjpeg(tjhandle hnd, unsigned char *jpegbuf, unsigned long *size, _catch(tjCompress(hnd, bmpbuf, w, 0, h, ps, jpegbuf, size, subsamp, qual, flags)); t=rrtime()-t; - sprintf(tempstr, "%s_enc_%s_%s_%sQ%d.jpg", basefilename, pixformat, - (flags&TJ_BOTTOMUP)? "BU":"TD", _subnames[subsamp], qual); + if(yuv) + sprintf(tempstr, "%s_enc_%s_%s_%s.yuv", basefilename, pixformat, + (flags&TJ_BOTTOMUP)? "BU":"TD", _subnames[subsamp]); + else + sprintf(tempstr, "%s_enc_%s_%s_%sQ%d.jpg", basefilename, pixformat, + (flags&TJ_BOTTOMUP)? "BU":"TD", _subnames[subsamp], qual); writejpeg(jpegbuf, *size, tempstr); - printf("Done. %f ms\n Result in %s\n", t*1000., tempstr); + if(yuv) + { + if(checkbufyuv(jpegbuf, *size, w, h, subsamp)) printf("Passed."); + else printf("FAILED!"); + } + else printf("Done."); + printf(" %f ms\n Result in %s\n", t*1000., tempstr); finally: if(bmpbuf) free(bmpbuf); @@ -261,6 +404,8 @@ void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize, unsigned char *bmpbuf=NULL; const char *pixformat; int _w=0, _h=0; double t; + if(yuv) return; + if(flags&TJ_BGR) { if(ps==3) pixformat="BGR"; @@ -404,8 +549,16 @@ void dotest1(void) int main(int argc, char *argv[]) { + if(argc>1 && !stricmp(argv[1], "-yuv")) yuv=1; dotest(35, 41, 3, TJ_444, "test"); dotest(35, 41, 4, TJ_444, "test"); + if(yuv) + { + dotest(35, 41, 3, TJ_422, "test"); + dotest(35, 41, 4, TJ_422, "test"); + dotest(35, 41, 3, TJ_420, "test"); + dotest(35, 41, 4, TJ_420, "test"); + } dotest(35, 41, 1, TJ_GRAYSCALE, "test"); dotest(35, 41, 3, TJ_GRAYSCALE, "test"); dotest(35, 41, 4, TJ_GRAYSCALE, "test"); diff --git a/jpgtest.cxx b/jpgtest.cxx index 79de5cf..96e5173 100644 --- a/jpgtest.cxx +++ b/jpgtest.cxx @@ -30,7 +30,7 @@ #define _throwbmp(m) _throw(m, bmpgeterr()) int forcemmx=0, forcesse=0, forcesse2=0, forcesse3=0, fastupsample=0, - decomponly=0; + decomponly=0, yuv=0; const int _ps[BMPPIXELFORMATS]={3, 4, 3, 4, 4, 4}; const int _flags[BMPPIXELFORMATS]={0, 0, TJ_BGR, TJ_BGR, TJ_BGR|TJ_ALPHAFIRST, TJ_ALPHAFIRST}; @@ -76,12 +76,20 @@ void dotest(unsigned char *srcbuf, int w, int h, BMPPIXELFORMAT pf, int bu, flags |= _flags[pf]; if(bu) flags |= TJ_BOTTOMUP; + if(yuv) flags |= TJ_YUV; if((rgbbuf=(unsigned char *)malloc(pitch*h)) == NULL) _throwunix("allocating image buffer"); - if(!quiet) printf("\n>>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", _pfname[pf], - bu?"Bottom-up":"Top-down", _subnamel[jpegsub], qual); + if(!quiet) + { + if(yuv) + printf("\n>>>>> %s (%s) <--> YUV %s <<<<<\n", _pfname[pf], + bu?"Bottom-up":"Top-down", _subnamel[jpegsub]); + else + printf("\n>>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", _pfname[pf], + bu?"Bottom-up":"Top-down", _subnamel[jpegsub], qual); + } if(dotile) {tilesizex=tilesizey=4;} else {tilesizex=w; tilesizey=h;} do @@ -155,7 +163,10 @@ void dotest(unsigned char *srcbuf, int w, int h, BMPPIXELFORMAT pf, int bu, } if(tilesizex==w && tilesizey==h) { - sprintf(tempstr, "%s_%sQ%d.jpg", filename, _subnames[jpegsub], qual); + if(yuv) + sprintf(tempstr, "%s_%s.yuv", filename, _subnames[jpegsub]); + else + sprintf(tempstr, "%s_%sQ%d.jpg", filename, _subnames[jpegsub], qual); if((outfile=fopen(tempstr, "wb"))==NULL) _throwunix("opening reference image"); if(fwrite(jpegbuf[0], jpgbufsize, 1, outfile)!=1) @@ -163,6 +174,7 @@ void dotest(unsigned char *srcbuf, int w, int h, BMPPIXELFORMAT pf, int bu, fclose(outfile); if(!quiet) printf("Reference image written to %s\n", tempstr); } + if(yuv) goto bailout; // Decompression test memset(rgbbuf, 127, pitch*h); // Grey image means decompressor did nothing @@ -371,7 +383,9 @@ void usage(char *progname) printf(" Use fast, inaccurate upsampling code to perform 4:2:2 and 4:2:0\n"); printf(" YUV decoding in libjpeg decompressor\n\n"); printf(" [-quiet]\n"); - printf(" Output in tabular rather than verbose format\n\n"); + printf(" Output in tabular rather than verbose format\n"); + printf(" [-yuv]\n"); + printf(" Encode RGB input as planar YUV rather than compressing as JPEG\n\n"); printf(" NOTE: If the quality is specified as a range, i.e. 90-100, a separate\n"); printf(" test will be performed for all quality values in the range.\n"); exit(1); @@ -441,6 +455,11 @@ int main(int argc, char *argv[]) printf("Using fast upsampling code\n"); fastupsample=1; } + if(!stricmp(argv[i], "-yuv")) + { + printf("Testing YUV planar encoding\n"); + yuv=1; + } if(!stricmp(argv[i], "-rgb")) pf=BMP_RGB; if(!stricmp(argv[i], "-rgba")) pf=BMP_RGBA; if(!stricmp(argv[i], "-bgr")) pf=BMP_BGR; diff --git a/turbojpeg.h b/turbojpeg.h index e382e71..b4bbd2d 100644 --- a/turbojpeg.h +++ b/turbojpeg.h @@ -53,6 +53,15 @@ enum {TJ_444=0, TJ_422, TJ_420, TJ_GRAYSCALE}; #define TJ_FASTUPSAMPLE 256 /* Use fast, inaccurate 4:2:2 and 4:2:0 YUV upsampling routines (libjpeg version only) */ +#define TJ_YUV 512 + /* Use the TurboJPEG YUV encoder to produce a planar YUV image that is + suitable for X Video. Specifically, if either the width or the height is + subsampled, then that dimension is padded to 2 in the output image. Also, + each line of each plane in the output image is padded to 4 bytes. + Although this will work with any subsampling option, it is really only + useful in combination with TJ_420, which produces an image compatible + with the I420 format. */ + typedef void* tjhandle; #define TJPAD(p) (((p)+3)&(~3)) diff --git a/turbojpegl.c b/turbojpegl.c index 03833c5..1d719cb 100644 --- a/turbojpegl.c +++ b/turbojpegl.c @@ -18,10 +18,35 @@ #include #include #include +#define JPEG_INTERNALS #include #include #include #include "./turbojpeg.h" +#ifdef sun +#include +#endif + +void *__memalign(size_t boundary, size_t size) +{ + #if defined(_WIN32) || defined(__APPLE__) + return malloc(size); + #else + #ifdef sun + return memalign(boundary, size); + #else + void *ptr=NULL; + posix_memalign(&ptr, boundary, size); + return ptr; + #endif + #endif +} + +#ifndef min + #define min(a,b) ((a)<(b)?(a):(b)) +#endif + +#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) // Error handling @@ -117,9 +142,18 @@ DLLEXPORT int DLLCALL tjCompress(tjhandle h, int jpegsub, int qual, int flags) { int i; JSAMPROW *row_pointer=NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *outbuf[MAX_COMPONENTS]; checkhandle(h); + for(i=0; i=NUMSUBOPT || qual<0 || qual>100) @@ -158,6 +192,14 @@ DLLEXPORT int DLLCALL tjCompress(tjhandle h, if(setjmp(j->jerr.jb)) { // this will execute if LIBJPEG has an error if(row_pointer) free(row_pointer); + for(i=0; ijdms.next_output_byte = dstbuf; j->jdms.free_in_buffer = TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height); - if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL) - _throw("Memory allocation failed in tjInitCompress()"); - for(i=0; icinfo, TRUE); + if(flags&TJ_YUV) { - if(flags&TJ_BOTTOMUP) row_pointer[i]= &srcbuf[(height-i-1)*pitch]; - else row_pointer[i]= &srcbuf[i*pitch]; + j_compress_ptr cinfo=&j->cinfo; + int row; + int pw=PAD(width, cinfo->max_h_samp_factor); + int ph=PAD(height, cinfo->max_v_samp_factor); + int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS]; + jpeg_component_info *compptr; + JSAMPLE *ptr=dstbuf; unsigned long yuvsize=0; + + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph))==NULL) + _throw("Memory allocation failed in tjCompress()"); + for(i=0; inum_components; i++) + { + compptr=&cinfo->comp_info[i]; + _tmpbuf[i]=(JSAMPLE *)__memalign(16, + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor); + if(!_tmpbuf[i]) _throw("Memory allocation failure"); + tmpbuf[i]=(JSAMPROW *)__memalign(16, + sizeof(JSAMPROW)*cinfo->max_v_samp_factor); + if(!tmpbuf[i]) _throw("Memory allocation failure"); + for(row=0; rowmax_v_samp_factor; row++) + tmpbuf[i][row]=&_tmpbuf[i][ + PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) + /compptr->h_samp_factor, 16) * row]; + _tmpbuf2[i]=(JSAMPLE *)__memalign(16, + PAD(compptr->width_in_blocks*DCTSIZE, 16) * compptr->v_samp_factor); + if(!_tmpbuf2[i]) _throw("Memory allocation failure"); + tmpbuf2[i]=(JSAMPROW *)__memalign(16, + sizeof(JSAMPROW)*compptr->v_samp_factor); + if(!tmpbuf2[i]) _throw("Memory allocation failure"); + for(row=0; rowv_samp_factor; row++) + tmpbuf2[i][row]=&_tmpbuf2[i][ + PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; + cw[i]=pw*compptr->h_samp_factor/cinfo->max_h_samp_factor; + ch[i]=ph*compptr->v_samp_factor/cinfo->max_v_samp_factor; + outbuf[i]=(JSAMPROW *)__memalign(16, sizeof(JSAMPROW)*ch[i]); + if(!outbuf[i]) _throw("Memory allocation failure"); + for(row=0; rowmax_v_samp_factor) + { + (*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, + 0, cinfo->max_v_samp_factor); + (cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0); + for(i=0, compptr=cinfo->comp_info; inum_components; + i++, compptr++) + jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i], + row*compptr->v_samp_factor/cinfo->max_v_samp_factor, + compptr->v_samp_factor, cw[i]); + } + *size=yuvsize; + cinfo->next_scanline+=height; } - jpeg_start_compress(&j->cinfo, TRUE); - while(j->cinfo.next_scanlinecinfo.image_height) + else { - jpeg_write_scanlines(&j->cinfo, &row_pointer[j->cinfo.next_scanline], - j->cinfo.image_height-j->cinfo.next_scanline); + if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL) + _throw("Memory allocation failed in tjCompress()"); + for(i=0; icinfo.next_scanlinecinfo.image_height) + { + jpeg_write_scanlines(&j->cinfo, &row_pointer[j->cinfo.next_scanline], + j->cinfo.image_height-j->cinfo.next_scanline); + } } jpeg_finish_compress(&j->cinfo); - *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height) - -(unsigned long)(j->jdms.free_in_buffer); + if(!(flags&TJ_YUV)) + *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height) + -(unsigned long)(j->jdms.free_in_buffer); if(row_pointer) free(row_pointer); + for(i=0; idinfo.do_fancy_upsampling=FALSE; jpeg_start_decompress(&j->dinfo); -- 2.40.0