granicus.if.org Git - libvpx/blob - y4minput.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  *
  10  *  Based on code from the OggTheora software codec source code,
  11  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
  12  */
  13 #include <errno.h>
  14 #include <stdlib.h>
  15 #include <string.h>
  16
  17 #include "vpx/vpx_integer.h"
  18 #include "y4minput.h"
  19
  20 // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
  21 // Returns true on success.
  22 static int file_read(void *buf, size_t size, FILE *file) {
  23   const int kMaxRetries = 5;
  24   int retry_count = 0;
  25   size_t len = 0;
  26   do {
  27     const size_t n = fread((uint8_t*)buf + len, 1, size - len, file);
  28     len += n;
  29     if (ferror(file)) {
  30       if (errno == EINTR || errno == EAGAIN) {
  31         ++retry_count;
  32         clearerr(file);
  33         continue;
  34       } else {
  35         fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n",
  36                 (uint32_t)len, (uint32_t)size, errno, strerror(errno));
  37         return 0;
  38       }
  39     }
  40   } while (!feof(file) && len < size && retry_count < kMaxRetries);
  41   return len == size;
  42 }
  43
  44 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
  45   int   got_w;
  46   int   got_h;
  47   int   got_fps;
  48   int   got_interlace;
  49   int   got_par;
  50   int   got_chroma;
  51   char *p;
  52   char *q;
  53   got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
  54   for (p = _tags;; p = q) {
  55     /*Skip any leading spaces.*/
  56     while (*p == ' ')p++;
  57     /*If that's all we have, stop.*/
  58     if (p[0] == '\0')break;
  59     /*Find the end of this tag.*/
  60     for (q = p + 1; *q != '\0' && *q != ' '; q++);
  61     /*Process the tag.*/
  62     switch (p[0]) {
  63       case 'W': {
  64         if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
  65         got_w = 1;
  66       }
  67       break;
  68       case 'H': {
  69         if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
  70         got_h = 1;
  71       }
  72       break;
  73       case 'F': {
  74         if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
  75           return -1;
  76         }
  77         got_fps = 1;
  78       }
  79       break;
  80       case 'I': {
  81         _y4m->interlace = p[1];
  82         got_interlace = 1;
  83       }
  84       break;
  85       case 'A': {
  86         if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
  87           return -1;
  88         }
  89         got_par = 1;
  90       }
  91       break;
  92       case 'C': {
  93         if (q - p > 16)return -1;
  94         memcpy(_y4m->chroma_type, p + 1, q - p - 1);
  95         _y4m->chroma_type[q - p - 1] = '\0';
  96         got_chroma = 1;
  97       }
  98       break;
  99       /*Ignore unknown tags.*/
 100     }
 101   }
 102   if (!got_w || !got_h || !got_fps)return -1;
 103   if (!got_interlace)_y4m->interlace = '?';
 104   if (!got_par)_y4m->par_n = _y4m->par_d = 0;
 105   /*Chroma-type is not specified in older files, e.g., those generated by
 106      mplayer.*/
 107   if (!got_chroma)strcpy(_y4m->chroma_type, "420");
 108   return 0;
 109 }
 110
 111
 112
 113 /*All anti-aliasing filters in the following conversion functions are based on
 114    one of two window functions:
 115   The 6-tap Lanczos window (for down-sampling and shifts):
 116    sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
 117    0,                         |t|>=3
 118   The 4-tap Mitchell window (for up-sampling):
 119    7|t|^3-12|t|^2+16/3,             |t|<1
 120    -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
 121    0,                               |t|>=2
 122   The number of taps is intentionally kept small to reduce computational
 123    overhead and limit ringing.
 124
 125   The taps from these filters are scaled so that their sum is 1, and the result
 126    is scaled by 128 and rounded to integers to create a filter whose
 127    intermediate values fit inside 16 bits.
 128   Coefficients are rounded in such a way as to ensure their sum is still 128,
 129    which is usually equivalent to normal rounding.
 130
 131   Conversions which require both horizontal and vertical filtering could
 132    have these steps pipelined, for less memory consumption and better cache
 133    performance, but we do them separately for simplicity.*/
 134
 135 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
 136 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
 137 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
 138
 139 /*420jpeg chroma samples are sited like:
 140   Y-------Y-------Y-------Y-------
 141   |       |       |       |
 142   |   BR  |       |   BR  |
 143   |       |       |       |
 144   Y-------Y-------Y-------Y-------
 145   |       |       |       |
 146   |       |       |       |
 147   |       |       |       |
 148   Y-------Y-------Y-------Y-------
 149   |       |       |       |
 150   |   BR  |       |   BR  |
 151   |       |       |       |
 152   Y-------Y-------Y-------Y-------
 153   |       |       |       |
 154   |       |       |       |
 155   |       |       |       |
 156
 157   420mpeg2 chroma samples are sited like:
 158   Y-------Y-------Y-------Y-------
 159   |       |       |       |
 160   BR      |       BR      |
 161   |       |       |       |
 162   Y-------Y-------Y-------Y-------
 163   |       |       |       |
 164   |       |       |       |
 165   |       |       |       |
 166   Y-------Y-------Y-------Y-------
 167   |       |       |       |
 168   BR      |       BR      |
 169   |       |       |       |
 170   Y-------Y-------Y-------Y-------
 171   |       |       |       |
 172   |       |       |       |
 173   |       |       |       |
 174
 175   We use a resampling filter to shift the site locations one quarter pixel (at
 176    the chroma plane's resolution) to the right.
 177   The 4:2:2 modes look exactly the same, except there are twice as many chroma
 178    lines, and they are vertically co-sited with the luma samples in both the
 179    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
 180 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
 181                                         const unsigned char *_src, int _c_w, int _c_h) {
 182   int y;
 183   int x;
 184   for (y = 0; y < _c_h; y++) {
 185     /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
 186        window.*/
 187     for (x = 0; x < OC_MINI(_c_w, 2); x++) {
 188       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
 189                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 190                                              _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
 191     }
 192     for (; x < _c_w - 3; x++) {
 193       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 194                                              114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
 195     }
 196     for (; x < _c_w; x++) {
 197       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 198                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 199                                              _src[_c_w - 1] + 64) >> 7, 255);
 200     }
 201     _dst += _c_w;
 202     _src += _c_w;
 203   }
 204 }
 205
 206 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
 207 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 208                                          unsigned char *_aux) {
 209   int c_w;
 210   int c_h;
 211   int c_sz;
 212   int pli;
 213   /*Skip past the luma data.*/
 214   _dst += _y4m->pic_w * _y4m->pic_h;
 215   /*Compute the size of each chroma plane.*/
 216   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 217   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 218   c_sz = c_w * c_h;
 219   for (pli = 1; pli < 3; pli++) {
 220     y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
 221     _dst += c_sz;
 222     _aux += c_sz;
 223   }
 224 }
 225
 226 /*This format is only used for interlaced content, but is included for
 227    completeness.
 228
 229   420jpeg chroma samples are sited like:
 230   Y-------Y-------Y-------Y-------
 231   |       |       |       |
 232   |   BR  |       |   BR  |
 233   |       |       |       |
 234   Y-------Y-------Y-------Y-------
 235   |       |       |       |
 236   |       |       |       |
 237   |       |       |       |
 238   Y-------Y-------Y-------Y-------
 239   |       |       |       |
 240   |   BR  |       |   BR  |
 241   |       |       |       |
 242   Y-------Y-------Y-------Y-------
 243   |       |       |       |
 244   |       |       |       |
 245   |       |       |       |
 246
 247   420paldv chroma samples are sited like:
 248   YR------Y-------YR------Y-------
 249   |       |       |       |
 250   |       |       |       |
 251   |       |       |       |
 252   YB------Y-------YB------Y-------
 253   |       |       |       |
 254   |       |       |       |
 255   |       |       |       |
 256   YR------Y-------YR------Y-------
 257   |       |       |       |
 258   |       |       |       |
 259   |       |       |       |
 260   YB------Y-------YB------Y-------
 261   |       |       |       |
 262   |       |       |       |
 263   |       |       |       |
 264
 265   We use a resampling filter to shift the site locations one quarter pixel (at
 266    the chroma plane's resolution) to the right.
 267   Then we use another filter to move the C_r location down one quarter pixel,
 268    and the C_b location up one quarter pixel.*/
 269 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 270                                          unsigned char *_aux) {
 271   unsigned char *tmp;
 272   int            c_w;
 273   int            c_h;
 274   int            c_sz;
 275   int            pli;
 276   int            y;
 277   int            x;
 278   /*Skip past the luma data.*/
 279   _dst += _y4m->pic_w * _y4m->pic_h;
 280   /*Compute the size of each chroma plane.*/
 281   c_w = (_y4m->pic_w + 1) / 2;
 282   c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 283   c_sz = c_w * c_h;
 284   tmp = _aux + 2 * c_sz;
 285   for (pli = 1; pli < 3; pli++) {
 286     /*First do the horizontal re-sampling.
 287       This is the same as the mpeg2 case, except that after the horizontal
 288        case, we need to apply a second vertical filter.*/
 289     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 290     _aux += c_sz;
 291     switch (pli) {
 292       case 1: {
 293         /*Slide C_b up a quarter-pel.
 294           This is the same filter used above, but in the other order.*/
 295         for (x = 0; x < c_w; x++) {
 296           for (y = 0; y < OC_MINI(c_h, 3); y++) {
 297             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
 298                                                          - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
 299                                                          + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 300                                                          + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
 301           }
 302           for (; y < c_h - 2; y++) {
 303             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 304                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 305                                                          - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
 306           }
 307           for (; y < c_h; y++) {
 308             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 309                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 310                                                          - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 311           }
 312           _dst++;
 313           tmp++;
 314         }
 315         _dst += c_sz - c_w;
 316         tmp -= c_w;
 317       }
 318       break;
 319       case 2: {
 320         /*Slide C_r down a quarter-pel.
 321           This is the same as the horizontal filter.*/
 322         for (x = 0; x < c_w; x++) {
 323           for (y = 0; y < OC_MINI(c_h, 2); y++) {
 324             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
 325                                                          - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
 326                                                          + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
 327                                                          + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
 328           }
 329           for (; y < c_h - 3; y++) {
 330             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 331                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
 332                                                          - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
 333           }
 334           for (; y < c_h; y++) {
 335             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 336                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 337                                                          - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 338           }
 339           _dst++;
 340           tmp++;
 341         }
 342       }
 343       break;
 344     }
 345     /*For actual interlaced material, this would have to be done separately on
 346        each field, and the shift amounts would be different.
 347       C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
 348        C_b up 1/8 in the bottom field.
 349       The corresponding filters would be:
 350        Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
 351        Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
 352   }
 353 }
 354
 355 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
 356   This is used as a helper by several converation routines.*/
 357 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
 358                                        const unsigned char *_src, int _c_w, int _c_h) {
 359   int y;
 360   int x;
 361   /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 362   for (x = 0; x < _c_w; x++) {
 363     for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
 364       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
 365                                           + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
 366                                           - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
 367                                           + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
 368     }
 369     for (; y < _c_h - 3; y += 2) {
 370       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
 371                                           - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
 372                                           + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
 373     }
 374     for (; y < _c_h; y += 2) {
 375       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
 376                                                + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
 377                                                                                   + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
 378                                           + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
 379     }
 380     _src++;
 381     _dst++;
 382   }
 383 }
 384
 385 /*420jpeg chroma samples are sited like:
 386   Y-------Y-------Y-------Y-------
 387   |       |       |       |
 388   |   BR  |       |   BR  |
 389   |       |       |       |
 390   Y-------Y-------Y-------Y-------
 391   |       |       |       |
 392   |       |       |       |
 393   |       |       |       |
 394   Y-------Y-------Y-------Y-------
 395   |       |       |       |
 396   |   BR  |       |   BR  |
 397   |       |       |       |
 398   Y-------Y-------Y-------Y-------
 399   |       |       |       |
 400   |       |       |       |
 401   |       |       |       |
 402
 403   422jpeg chroma samples are sited like:
 404   Y---BR--Y-------Y---BR--Y-------
 405   |       |       |       |
 406   |       |       |       |
 407   |       |       |       |
 408   Y---BR--Y-------Y---BR--Y-------
 409   |       |       |       |
 410   |       |       |       |
 411   |       |       |       |
 412   Y---BR--Y-------Y---BR--Y-------
 413   |       |       |       |
 414   |       |       |       |
 415   |       |       |       |
 416   Y---BR--Y-------Y---BR--Y-------
 417   |       |       |       |
 418   |       |       |       |
 419   |       |       |       |
 420
 421   We use a resampling filter to decimate the chroma planes by two in the
 422    vertical direction.*/
 423 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 424                                         unsigned char *_aux) {
 425   int c_w;
 426   int c_h;
 427   int c_sz;
 428   int dst_c_w;
 429   int dst_c_h;
 430   int dst_c_sz;
 431   int pli;
 432   /*Skip past the luma data.*/
 433   _dst += _y4m->pic_w * _y4m->pic_h;
 434   /*Compute the size of each chroma plane.*/
 435   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 436   c_h = _y4m->pic_h;
 437   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 438   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 439   c_sz = c_w * c_h;
 440   dst_c_sz = dst_c_w * dst_c_h;
 441   for (pli = 1; pli < 3; pli++) {
 442     y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
 443     _aux += c_sz;
 444     _dst += dst_c_sz;
 445   }
 446 }
 447
 448 /*420jpeg chroma samples are sited like:
 449   Y-------Y-------Y-------Y-------
 450   |       |       |       |
 451   |   BR  |       |   BR  |
 452   |       |       |       |
 453   Y-------Y-------Y-------Y-------
 454   |       |       |       |
 455   |       |       |       |
 456   |       |       |       |
 457   Y-------Y-------Y-------Y-------
 458   |       |       |       |
 459   |   BR  |       |   BR  |
 460   |       |       |       |
 461   Y-------Y-------Y-------Y-------
 462   |       |       |       |
 463   |       |       |       |
 464   |       |       |       |
 465
 466   422 chroma samples are sited like:
 467   YBR-----Y-------YBR-----Y-------
 468   |       |       |       |
 469   |       |       |       |
 470   |       |       |       |
 471   YBR-----Y-------YBR-----Y-------
 472   |       |       |       |
 473   |       |       |       |
 474   |       |       |       |
 475   YBR-----Y-------YBR-----Y-------
 476   |       |       |       |
 477   |       |       |       |
 478   |       |       |       |
 479   YBR-----Y-------YBR-----Y-------
 480   |       |       |       |
 481   |       |       |       |
 482   |       |       |       |
 483
 484   We use a resampling filter to shift the original site locations one quarter
 485    pixel (at the original chroma resolution) to the right.
 486   Then we use a second resampling filter to decimate the chroma planes by two
 487    in the vertical direction.*/
 488 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 489                                     unsigned char *_aux) {
 490   unsigned char *tmp;
 491   int            c_w;
 492   int            c_h;
 493   int            c_sz;
 494   int            dst_c_h;
 495   int            dst_c_sz;
 496   int            pli;
 497   /*Skip past the luma data.*/
 498   _dst += _y4m->pic_w * _y4m->pic_h;
 499   /*Compute the size of each chroma plane.*/
 500   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 501   c_h = _y4m->pic_h;
 502   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 503   c_sz = c_w * c_h;
 504   dst_c_sz = c_w * dst_c_h;
 505   tmp = _aux + 2 * c_sz;
 506   for (pli = 1; pli < 3; pli++) {
 507     /*In reality, the horizontal and vertical steps could be pipelined, for
 508        less memory consumption and better cache performance, but we do them
 509        separately for simplicity.*/
 510     /*First do horizontal filtering (convert to 422jpeg)*/
 511     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 512     /*Now do the vertical filtering.*/
 513     y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
 514     _aux += c_sz;
 515     _dst += dst_c_sz;
 516   }
 517 }
 518
 519 /*420jpeg chroma samples are sited like:
 520   Y-------Y-------Y-------Y-------
 521   |       |       |       |
 522   |   BR  |       |   BR  |
 523   |       |       |       |
 524   Y-------Y-------Y-------Y-------
 525   |       |       |       |
 526   |       |       |       |
 527   |       |       |       |
 528   Y-------Y-------Y-------Y-------
 529   |       |       |       |
 530   |   BR  |       |   BR  |
 531   |       |       |       |
 532   Y-------Y-------Y-------Y-------
 533   |       |       |       |
 534   |       |       |       |
 535   |       |       |       |
 536
 537   411 chroma samples are sited like:
 538   YBR-----Y-------Y-------Y-------
 539   |       |       |       |
 540   |       |       |       |
 541   |       |       |       |
 542   YBR-----Y-------Y-------Y-------
 543   |       |       |       |
 544   |       |       |       |
 545   |       |       |       |
 546   YBR-----Y-------Y-------Y-------
 547   |       |       |       |
 548   |       |       |       |
 549   |       |       |       |
 550   YBR-----Y-------Y-------Y-------
 551   |       |       |       |
 552   |       |       |       |
 553   |       |       |       |
 554
 555   We use a filter to resample at site locations one eighth pixel (at the source
 556    chroma plane's horizontal resolution) and five eighths of a pixel to the
 557    right.
 558   Then we use another filter to decimate the planes by 2 in the vertical
 559    direction.*/
 560 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 561                                     unsigned char *_aux) {
 562   unsigned char *tmp;
 563   int            c_w;
 564   int            c_h;
 565   int            c_sz;
 566   int            dst_c_w;
 567   int            dst_c_h;
 568   int            dst_c_sz;
 569   int            tmp_sz;
 570   int            pli;
 571   int            y;
 572   int            x;
 573   /*Skip past the luma data.*/
 574   _dst += _y4m->pic_w * _y4m->pic_h;
 575   /*Compute the size of each chroma plane.*/
 576   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 577   c_h = _y4m->pic_h;
 578   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 579   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 580   c_sz = c_w * c_h;
 581   dst_c_sz = dst_c_w * dst_c_h;
 582   tmp_sz = dst_c_w * c_h;
 583   tmp = _aux + 2 * c_sz;
 584   for (pli = 1; pli < 3; pli++) {
 585     /*In reality, the horizontal and vertical steps could be pipelined, for
 586        less memory consumption and better cache performance, but we do them
 587        separately for simplicity.*/
 588     /*First do horizontal filtering (convert to 422jpeg)*/
 589     for (y = 0; y < c_h; y++) {
 590       /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
 591          4-tap Mitchell window.*/
 592       for (x = 0; x < OC_MINI(c_w, 1); x++) {
 593         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
 594                                                    + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 595         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
 596                                                        + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 597       }
 598       for (; x < c_w - 2; x++) {
 599         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 600                                                    + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
 601         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 602                                                        + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
 603       }
 604       for (; x < c_w; x++) {
 605         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 606                                                    + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
 607         if ((x << 1 | 1) < dst_c_w) {
 608           tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 609                                                          + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
 610         }
 611       }
 612       tmp += dst_c_w;
 613       _aux += c_w;
 614     }
 615     tmp -= tmp_sz;
 616     /*Now do the vertical filtering.*/
 617     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 618     _dst += dst_c_sz;
 619   }
 620 }
 621
 622 /*Convert 444 to 420jpeg.*/
 623 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 624                                     unsigned char *_aux) {
 625   unsigned char *tmp;
 626   int            c_w;
 627   int            c_h;
 628   int            c_sz;
 629   int            dst_c_w;
 630   int            dst_c_h;
 631   int            dst_c_sz;
 632   int            tmp_sz;
 633   int            pli;
 634   int            y;
 635   int            x;
 636   /*Skip past the luma data.*/
 637   _dst += _y4m->pic_w * _y4m->pic_h;
 638   /*Compute the size of each chroma plane.*/
 639   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 640   c_h = _y4m->pic_h;
 641   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 642   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 643   c_sz = c_w * c_h;
 644   dst_c_sz = dst_c_w * dst_c_h;
 645   tmp_sz = dst_c_w * c_h;
 646   tmp = _aux + 2 * c_sz;
 647   for (pli = 1; pli < 3; pli++) {
 648     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 649     for (y = 0; y < c_h; y++) {
 650       for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
 651         tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
 652                                     - 17 * _aux[OC_MINI(2, c_w - 1)]
 653                                     + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
 654       }
 655       for (; x < c_w - 3; x += 2) {
 656         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
 657                                     - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
 658       }
 659       for (; x < c_w; x += 2) {
 660         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
 661                                     17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
 662                                     78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
 663       }
 664       tmp += dst_c_w;
 665       _aux += c_w;
 666     }
 667     tmp -= tmp_sz;
 668     /*Now do the vertical filtering.*/
 669     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 670     _dst += dst_c_sz;
 671   }
 672 }
 673
 674 /*The image is padded with empty chroma components at 4:2:0.*/
 675 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 676                                      unsigned char *_aux) {
 677   int c_sz;
 678   _dst += _y4m->pic_w * _y4m->pic_h;
 679   c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 680          ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 681   memset(_dst, 128, c_sz * 2);
 682 }
 683
 684 /*No conversion function needed.*/
 685 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
 686                              unsigned char *_aux) {
 687 }
 688
 689 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
 690                    int only_420) {
 691   char buffer[80];
 692   int  ret;
 693   int  i;
 694   /*Read until newline, or 80 cols, whichever happens first.*/
 695   for (i = 0; i < 79; i++) {
 696     if (_nskip > 0) {
 697       buffer[i] = *_skip++;
 698       _nskip--;
 699     } else {
 700       if (!file_read(buffer + i, 1, _fin)) return -1;
 701     }
 702     if (buffer[i] == '\n')break;
 703   }
 704   /*We skipped too much header data.*/
 705   if (_nskip > 0)return -1;
 706   if (i == 79) {
 707     fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
 708     return -1;
 709   }
 710   buffer[i] = '\0';
 711   if (memcmp(buffer, "YUV4MPEG", 8)) {
 712     fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
 713     return -1;
 714   }
 715   if (buffer[8] != '2') {
 716     fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
 717   }
 718   ret = y4m_parse_tags(_y4m, buffer + 5);
 719   if (ret < 0) {
 720     fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
 721     return ret;
 722   }
 723   if (_y4m->interlace == '?') {
 724     fprintf(stderr, "Warning: Input video interlacing format unknown; "
 725             "assuming progressive scan.\n");
 726   } else if (_y4m->interlace != 'p') {
 727     fprintf(stderr, "Input video is interlaced; "
 728             "Only progressive scan handled.\n");
 729     return -1;
 730   }
 731   _y4m->vpx_fmt = VPX_IMG_FMT_I420;
 732   _y4m->vpx_bps = 12;
 733   if (strcmp(_y4m->chroma_type, "420") == 0 ||
 734       strcmp(_y4m->chroma_type, "420jpeg") == 0) {
 735     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 736     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 737                             + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 738     /*Natively supported: no conversion required.*/
 739     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 740     _y4m->convert = y4m_convert_null;
 741   } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
 742     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 743     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 744     /*Chroma filter required: read into the aux buf first.*/
 745     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
 746                          2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 747     _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
 748   } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
 749     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 750     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 751     /*Chroma filter required: read into the aux buf first.
 752       We need to make two filter passes, so we need some extra space in the
 753        aux buffer.*/
 754     _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 755     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 756     _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
 757   } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
 758     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
 759     _y4m->src_c_dec_v = 1;
 760     _y4m->dst_c_dec_v = 2;
 761     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 762     /*Chroma filter required: read into the aux buf first.*/
 763     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 764     _y4m->convert = y4m_convert_422jpeg_420jpeg;
 765   } else if (strcmp(_y4m->chroma_type, "422") == 0) {
 766     _y4m->src_c_dec_h = 2;
 767     _y4m->src_c_dec_v = 1;
 768     if (only_420) {
 769       _y4m->dst_c_dec_h = 2;
 770       _y4m->dst_c_dec_v = 2;
 771       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 772       /*Chroma filter required: read into the aux buf first.
 773         We need to make two filter passes, so we need some extra space in the
 774          aux buffer.*/
 775       _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 776       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 777           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 778       _y4m->convert = y4m_convert_422_420jpeg;
 779     } else {
 780       _y4m->vpx_fmt = VPX_IMG_FMT_I422;
 781       _y4m->vpx_bps = 16;
 782       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 783       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 784       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 785                               + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 786       /*Natively supported: no conversion required.*/
 787       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 788       _y4m->convert = y4m_convert_null;
 789       }
 790   } else if (strcmp(_y4m->chroma_type, "411") == 0) {
 791     _y4m->src_c_dec_h = 4;
 792     _y4m->dst_c_dec_h = 2;
 793     _y4m->src_c_dec_v = 1;
 794     _y4m->dst_c_dec_v = 2;
 795     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 796     /*Chroma filter required: read into the aux buf first.
 797       We need to make two filter passes, so we need some extra space in the
 798        aux buffer.*/
 799     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
 800     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 801     _y4m->convert = y4m_convert_411_420jpeg;
 802   } else if (strcmp(_y4m->chroma_type, "444") == 0) {
 803     _y4m->src_c_dec_h = 1;
 804     _y4m->src_c_dec_v = 1;
 805     if (only_420) {
 806       _y4m->dst_c_dec_h = 2;
 807       _y4m->dst_c_dec_v = 2;
 808       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 809       /*Chroma filter required: read into the aux buf first.
 810         We need to make two filter passes, so we need some extra space in the
 811          aux buffer.*/
 812       _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
 813       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 814           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 815       _y4m->convert = y4m_convert_444_420jpeg;
 816     } else {
 817       _y4m->vpx_fmt = VPX_IMG_FMT_I444;
 818       _y4m->vpx_bps = 24;
 819       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 820       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 821       _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 822       /*Natively supported: no conversion required.*/
 823       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 824       _y4m->convert = y4m_convert_null;
 825     }
 826   } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
 827     _y4m->src_c_dec_h = 1;
 828     _y4m->src_c_dec_v = 1;
 829     if (only_420) {
 830       _y4m->dst_c_dec_h = 2;
 831       _y4m->dst_c_dec_v = 2;
 832       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 833       /*Chroma filter required: read into the aux buf first.
 834         We need to make two filter passes, so we need some extra space in the
 835          aux buffer.
 836         The extra plane also gets read into the aux buf.
 837         It will be discarded.*/
 838       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 839       _y4m->convert = y4m_convert_444_420jpeg;
 840     } else {
 841       _y4m->vpx_fmt = VPX_IMG_FMT_444A;
 842       _y4m->vpx_bps = 32;
 843       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 844       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 845       _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
 846       /*Natively supported: no conversion required.*/
 847       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 848       _y4m->convert = y4m_convert_null;
 849     }
 850   } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
 851     _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
 852     _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
 853     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 854     /*No extra space required, but we need to clear the chroma planes.*/
 855     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 856     _y4m->convert = y4m_convert_mono_420jpeg;
 857   } else {
 858     fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
 859     return -1;
 860   }
 861   /*The size of the final frame buffers is always computed from the
 862      destination chroma decimation type.*/
 863   _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
 864                      + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 865                      ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 866   _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
 867   _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
 868   return 0;
 869 }
 870
 871 void y4m_input_close(y4m_input *_y4m) {
 872   free(_y4m->dst_buf);
 873   free(_y4m->aux_buf);
 874 }
 875
 876 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
 877   char frame[6];
 878   int  pic_sz;
 879   int  c_w;
 880   int  c_h;
 881   int  c_sz;
 882   /*Read and skip the frame header.*/
 883   if (!file_read(frame, 6, _fin)) return 0;
 884   if (memcmp(frame, "FRAME", 5)) {
 885     fprintf(stderr, "Loss of framing in Y4M input data\n");
 886     return -1;
 887   }
 888   if (frame[5] != '\n') {
 889     char c;
 890     int  j;
 891     for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {}
 892     if (j == 79) {
 893       fprintf(stderr, "Error parsing Y4M frame header\n");
 894       return -1;
 895     }
 896   }
 897   /*Read the frame data that needs no conversion.*/
 898   if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) {
 899     fprintf(stderr, "Error reading Y4M frame data.\n");
 900     return -1;
 901   }
 902   /*Read the frame data that does need conversion.*/
 903   if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) {
 904     fprintf(stderr, "Error reading Y4M frame data.\n");
 905     return -1;
 906   }
 907   /*Now convert the just read frame.*/
 908   (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
 909   /*Fill in the frame buffer pointers.
 910     We don't use vpx_img_wrap() because it forces padding for odd picture
 911      sizes, which would require a separate fread call for every row.*/
 912   memset(_img, 0, sizeof(*_img));
 913   /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
 914   _img->fmt = _y4m->vpx_fmt;
 915   _img->w = _img->d_w = _y4m->pic_w;
 916   _img->h = _img->d_h = _y4m->pic_h;
 917   _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
 918   _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
 919   _img->bps = _y4m->vpx_bps;
 920
 921   /*Set up the buffer pointers.*/
 922   pic_sz = _y4m->pic_w * _y4m->pic_h;
 923   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 924   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 925   c_sz = c_w * c_h;
 926   _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
 927   _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
 928   _img->planes[PLANE_Y] = _y4m->dst_buf;
 929   _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
 930   _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
 931   _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
 932   return 1;
 933 }