granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_temporal_filter.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <limits.h>
  13
  14 #include "vp9/common/vp9_alloccommon.h"
  15 #include "vp9/common/vp9_onyxc_int.h"
  16 #include "vp9/common/vp9_quant_common.h"
  17 #include "vp9/common/vp9_reconinter.h"
  18 #include "vp9/encoder/vp9_extend.h"
  19 #include "vp9/encoder/vp9_firstpass.h"
  20 #include "vp9/encoder/vp9_mcomp.h"
  21 #include "vp9/encoder/vp9_encoder.h"
  22 #include "vp9/encoder/vp9_quantize.h"
  23 #include "vp9/encoder/vp9_ratectrl.h"
  24 #include "vp9/encoder/vp9_segmentation.h"
  25 #include "vp9/encoder/vp9_temporal_filter.h"
  26 #include "vpx_dsp/vpx_dsp_common.h"
  27 #include "vpx_mem/vpx_mem.h"
  28 #include "vpx_ports/mem.h"
  29 #include "vpx_ports/vpx_timer.h"
  30 #include "vpx_scale/vpx_scale.h"
  31
  32 static int fixed_divide[512];
  33
  34 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
  35                                             uint8_t *y_mb_ptr,
  36                                             uint8_t *u_mb_ptr,
  37                                             uint8_t *v_mb_ptr,
  38                                             int stride,
  39                                             int uv_block_width,
  40                                             int uv_block_height,
  41                                             int mv_row,
  42                                             int mv_col,
  43                                             uint8_t *pred,
  44                                             struct scale_factors *scale,
  45                                             int x, int y) {
  46   const int which_mv = 0;
  47   const MV mv = { mv_row, mv_col };
  48   const InterpKernel *const kernel =
  49     vp9_filter_kernels[xd->mi[0]->interp_filter];
  50
  51   enum mv_precision mv_precision_uv;
  52   int uv_stride;
  53   if (uv_block_width == 8) {
  54     uv_stride = (stride + 1) >> 1;
  55     mv_precision_uv = MV_PRECISION_Q4;
  56   } else {
  57     uv_stride = stride;
  58     mv_precision_uv = MV_PRECISION_Q3;
  59   }
  60
  61 #if CONFIG_VP9_HIGHBITDEPTH
  62   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  63     vp9_highbd_build_inter_predictor(y_mb_ptr, stride,
  64                                      &pred[0], 16,
  65                                      &mv,
  66                                      scale,
  67                                      16, 16,
  68                                      which_mv,
  69                                      kernel, MV_PRECISION_Q3, x, y, xd->bd);
  70
  71     vp9_highbd_build_inter_predictor(u_mb_ptr, uv_stride,
  72                                      &pred[256], uv_block_width,
  73                                      &mv,
  74                                      scale,
  75                                      uv_block_width, uv_block_height,
  76                                      which_mv,
  77                                      kernel, mv_precision_uv, x, y, xd->bd);
  78
  79     vp9_highbd_build_inter_predictor(v_mb_ptr, uv_stride,
  80                                      &pred[512], uv_block_width,
  81                                      &mv,
  82                                      scale,
  83                                      uv_block_width, uv_block_height,
  84                                      which_mv,
  85                                      kernel, mv_precision_uv, x, y, xd->bd);
  86     return;
  87   }
  88 #endif  // CONFIG_VP9_HIGHBITDEPTH
  89   vp9_build_inter_predictor(y_mb_ptr, stride,
  90                             &pred[0], 16,
  91                             &mv,
  92                             scale,
  93                             16, 16,
  94                             which_mv,
  95                             kernel, MV_PRECISION_Q3, x, y);
  96
  97   vp9_build_inter_predictor(u_mb_ptr, uv_stride,
  98                             &pred[256], uv_block_width,
  99                             &mv,
 100                             scale,
 101                             uv_block_width, uv_block_height,
 102                             which_mv,
 103                             kernel, mv_precision_uv, x, y);
 104
 105   vp9_build_inter_predictor(v_mb_ptr, uv_stride,
 106                             &pred[512], uv_block_width,
 107                             &mv,
 108                             scale,
 109                             uv_block_width, uv_block_height,
 110                             which_mv,
 111                             kernel, mv_precision_uv, x, y);
 112 }
 113
 114 void vp9_temporal_filter_init(void) {
 115   int i;
 116
 117   fixed_divide[0] = 0;
 118   for (i = 1; i < 512; ++i)
 119     fixed_divide[i] = 0x80000 / i;
 120 }
 121
 122 void vp9_temporal_filter_apply_c(uint8_t *frame1,
 123                                  unsigned int stride,
 124                                  uint8_t *frame2,
 125                                  unsigned int block_width,
 126                                  unsigned int block_height,
 127                                  int strength,
 128                                  int filter_weight,
 129                                  unsigned int *accumulator,
 130                                  uint16_t *count) {
 131   unsigned int i, j, k;
 132   int modifier;
 133   int byte = 0;
 134   const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
 135
 136   for (i = 0, k = 0; i < block_height; i++) {
 137     for (j = 0; j < block_width; j++, k++) {
 138       int pixel_value = *frame2;
 139
 140       // non-local mean approach
 141       int diff_sse[9] = { 0 };
 142       int idx, idy, index = 0;
 143
 144       for (idy = -1; idy <= 1; ++idy) {
 145         for (idx = -1; idx <= 1; ++idx) {
 146           int row = i + idy;
 147           int col = j + idx;
 148
 149           if (row >= 0 && row < (int)block_height &&
 150               col >= 0 && col < (int)block_width) {
 151             int diff = frame1[byte + idy * (int)stride + idx] -
 152                 frame2[idy * (int)block_width + idx];
 153             diff_sse[index] = diff * diff;
 154             ++index;
 155           }
 156         }
 157       }
 158
 159       assert(index > 0);
 160
 161       modifier = 0;
 162       for (idx = 0; idx < 9; ++idx)
 163         modifier += diff_sse[idx];
 164
 165       modifier *= 3;
 166       modifier /= index;
 167
 168       ++frame2;
 169
 170       modifier  += rounding;
 171       modifier >>= strength;
 172
 173       if (modifier > 16)
 174         modifier = 16;
 175
 176       modifier = 16 - modifier;
 177       modifier *= filter_weight;
 178
 179       count[k] += modifier;
 180       accumulator[k] += modifier * pixel_value;
 181
 182       byte++;
 183     }
 184
 185     byte += stride - block_width;
 186   }
 187 }
 188
 189 #if CONFIG_VP9_HIGHBITDEPTH
 190 void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
 191                                         unsigned int stride,
 192                                         uint8_t *frame2_8,
 193                                         unsigned int block_width,
 194                                         unsigned int block_height,
 195                                         int strength,
 196                                         int filter_weight,
 197                                         unsigned int *accumulator,
 198                                         uint16_t *count) {
 199   uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
 200   uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
 201   unsigned int i, j, k;
 202   int modifier;
 203   int byte = 0;
 204   const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
 205
 206   for (i = 0, k = 0; i < block_height; i++) {
 207     for (j = 0; j < block_width; j++, k++) {
 208       int pixel_value = *frame2;
 209       int diff_sse[9] = { 0 };
 210       int idx, idy, index = 0;
 211
 212       for (idy = -1; idy <= 1; ++idy) {
 213         for (idx = -1; idx <= 1; ++idx) {
 214           int row = i + idy;
 215           int col = j + idx;
 216
 217           if (row >= 0 && row < (int)block_height &&
 218               col >= 0 && col < (int)block_width) {
 219             int diff = frame1[byte + idy * (int)stride + idx] -
 220                 frame2[idy * (int)block_width + idx];
 221             diff_sse[index] = diff * diff;
 222             ++index;
 223           }
 224         }
 225       }
 226       assert(index > 0);
 227
 228       modifier = 0;
 229       for (idx = 0; idx < 9; ++idx)
 230         modifier += diff_sse[idx];
 231
 232       modifier *= 3;
 233       modifier /= index;
 234
 235       ++frame2;
 236       modifier += rounding;
 237       modifier >>= strength;
 238
 239       if (modifier > 16)
 240         modifier = 16;
 241
 242       modifier = 16 - modifier;
 243       modifier *= filter_weight;
 244
 245       count[k] += modifier;
 246       accumulator[k] += modifier * pixel_value;
 247
 248       byte++;
 249     }
 250
 251     byte += stride - block_width;
 252   }
 253 }
 254 #endif  // CONFIG_VP9_HIGHBITDEPTH
 255
 256 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
 257                                               uint8_t *arf_frame_buf,
 258                                               uint8_t *frame_ptr_buf,
 259                                               int stride) {
 260   MACROBLOCK *const x = &cpi->td.mb;
 261   MACROBLOCKD *const xd = &x->e_mbd;
 262   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
 263   const SEARCH_METHODS old_search_method = mv_sf->search_method;
 264   int step_param;
 265   int sadpb = x->sadperbit16;
 266   int bestsme = INT_MAX;
 267   int distortion;
 268   unsigned int sse;
 269   int cost_list[5];
 270
 271   MV best_ref_mv1 = {0, 0};
 272   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
 273   MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv;
 274
 275   // Save input state
 276   struct buf_2d src = x->plane[0].src;
 277   struct buf_2d pre = xd->plane[0].pre[0];
 278
 279   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
 280   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
 281
 282   // Setup frame pointers
 283   x->plane[0].src.buf = arf_frame_buf;
 284   x->plane[0].src.stride = stride;
 285   xd->plane[0].pre[0].buf = frame_ptr_buf;
 286   xd->plane[0].pre[0].stride = stride;
 287
 288   step_param = mv_sf->reduce_first_step_size;
 289   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
 290
 291   mv_sf->search_method = HEX;
 292   vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
 293                         sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1,
 294                         ref_mv, 0, 0);
 295   mv_sf->search_method = old_search_method;
 296
 297   // Ignore mv costing by sending NULL pointer instead of cost array
 298   bestsme = cpi->find_fractional_mv_step(x, ref_mv,
 299                                          &best_ref_mv1,
 300                                          cpi->common.allow_high_precision_mv,
 301                                          x->errorperbit,
 302                                          &cpi->fn_ptr[BLOCK_16X16],
 303                                          0, mv_sf->subpel_iters_per_step,
 304                                          cond_cost_list(cpi, cost_list),
 305                                          NULL, NULL,
 306                                          &distortion, &sse, NULL, 0, 0);
 307
 308   // Restore input state
 309   x->plane[0].src = src;
 310   xd->plane[0].pre[0] = pre;
 311
 312   return bestsme;
 313 }
 314
 315 static void temporal_filter_iterate_c(VP9_COMP *cpi,
 316                                       YV12_BUFFER_CONFIG **frames,
 317                                       int frame_count,
 318                                       int alt_ref_index,
 319                                       int strength,
 320                                       struct scale_factors *scale) {
 321   int byte;
 322   int frame;
 323   int mb_col, mb_row;
 324   unsigned int filter_weight;
 325   int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
 326   int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
 327   int mb_y_offset = 0;
 328   int mb_uv_offset = 0;
 329   DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
 330   DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
 331   MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
 332   YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
 333   uint8_t *dst1, *dst2;
 334 #if CONFIG_VP9_HIGHBITDEPTH
 335   DECLARE_ALIGNED(16, uint16_t,  predictor16[16 * 16 * 3]);
 336   DECLARE_ALIGNED(16, uint8_t,  predictor8[16 * 16 * 3]);
 337   uint8_t *predictor;
 338 #else
 339   DECLARE_ALIGNED(16, uint8_t,  predictor[16 * 16 * 3]);
 340 #endif
 341   const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
 342   const int mb_uv_width  = 16 >> mbd->plane[1].subsampling_x;
 343
 344   // Save input state
 345   uint8_t* input_buffer[MAX_MB_PLANE];
 346   int i;
 347 #if CONFIG_VP9_HIGHBITDEPTH
 348   if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 349     predictor = CONVERT_TO_BYTEPTR(predictor16);
 350   } else {
 351     predictor = predictor8;
 352   }
 353 #endif
 354
 355   for (i = 0; i < MAX_MB_PLANE; i++)
 356     input_buffer[i] = mbd->plane[i].pre[0].buf;
 357
 358   for (mb_row = 0; mb_row < mb_rows; mb_row++) {
 359     // Source frames are extended to 16 pixels. This is different than
 360     //  L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
 361     // A 6/8 tap filter is used for motion search.  This requires 2 pixels
 362     //  before and 3 pixels after.  So the largest Y mv on a border would
 363     //  then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
 364     //  Y and therefore only extended by 8.  The largest mv that a UV block
 365     //  can support is 8 - VP9_INTERP_EXTEND.  A UV mv is half of a Y mv.
 366     //  (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
 367     //  8 - VP9_INTERP_EXTEND.
 368     // To keep the mv in play for both Y and UV planes the max that it
 369     //  can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
 370     cpi->td.mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
 371     cpi->td.mb.mv_row_max = ((mb_rows - 1 - mb_row) * 16)
 372                          + (17 - 2 * VP9_INTERP_EXTEND);
 373
 374     for (mb_col = 0; mb_col < mb_cols; mb_col++) {
 375       int i, j, k;
 376       int stride;
 377
 378       memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
 379       memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
 380
 381       cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
 382       cpi->td.mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16)
 383                            + (17 - 2 * VP9_INTERP_EXTEND);
 384
 385       for (frame = 0; frame < frame_count; frame++) {
 386         const int thresh_low  = 10000;
 387         const int thresh_high = 20000;
 388
 389         if (frames[frame] == NULL)
 390           continue;
 391
 392         mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
 393         mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
 394
 395         if (frame == alt_ref_index) {
 396           filter_weight = 2;
 397         } else {
 398           // Find best match in this frame by MC
 399           int err = temporal_filter_find_matching_mb_c(cpi,
 400               frames[alt_ref_index]->y_buffer + mb_y_offset,
 401               frames[frame]->y_buffer + mb_y_offset,
 402               frames[frame]->y_stride);
 403
 404           // Assign higher weight to matching MB if it's error
 405           // score is lower. If not applying MC default behavior
 406           // is to weight all MBs equal.
 407           filter_weight = err < thresh_low
 408                           ? 2 : err < thresh_high ? 1 : 0;
 409         }
 410
 411         if (filter_weight != 0) {
 412           // Construct the predictors
 413           temporal_filter_predictors_mb_c(mbd,
 414               frames[frame]->y_buffer + mb_y_offset,
 415               frames[frame]->u_buffer + mb_uv_offset,
 416               frames[frame]->v_buffer + mb_uv_offset,
 417               frames[frame]->y_stride,
 418               mb_uv_width, mb_uv_height,
 419               mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
 420               mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
 421               predictor, scale,
 422               mb_col * 16, mb_row * 16);
 423
 424 #if CONFIG_VP9_HIGHBITDEPTH
 425           if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 426             int adj_strength = strength + 2 * (mbd->bd - 8);
 427             // Apply the filter (YUV)
 428             vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
 429                                                f->y_stride,
 430                                                predictor, 16, 16, adj_strength,
 431                                                filter_weight,
 432                                                accumulator, count);
 433             vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
 434                                                f->uv_stride, predictor + 256,
 435                                                mb_uv_width, mb_uv_height,
 436                                                adj_strength,
 437                                                filter_weight, accumulator + 256,
 438                                                count + 256);
 439             vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
 440                                                f->uv_stride, predictor + 512,
 441                                                mb_uv_width, mb_uv_height,
 442                                                adj_strength, filter_weight,
 443                                                accumulator + 512, count + 512);
 444           } else {
 445             // Apply the filter (YUV)
 446             vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
 447                                         predictor, 16, 16,
 448                                         strength, filter_weight,
 449                                         accumulator, count);
 450             vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
 451                                         f->uv_stride,
 452                                         predictor + 256,
 453                                         mb_uv_width, mb_uv_height, strength,
 454                                         filter_weight, accumulator + 256,
 455                                         count + 256);
 456             vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
 457                                         f->uv_stride,
 458                                         predictor + 512,
 459                                         mb_uv_width, mb_uv_height, strength,
 460                                         filter_weight, accumulator + 512,
 461                                         count + 512);
 462           }
 463 #else
 464           // Apply the filter (YUV)
 465           // TODO(jingning): Need SIMD optimization for this.
 466           vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
 467                                       predictor, 16, 16,
 468                                       strength, filter_weight,
 469                                       accumulator, count);
 470           vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
 471                                       predictor + 256,
 472                                       mb_uv_width, mb_uv_height, strength,
 473                                       filter_weight, accumulator + 256,
 474                                       count + 256);
 475           vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
 476                                       predictor + 512,
 477                                       mb_uv_width, mb_uv_height, strength,
 478                                       filter_weight, accumulator + 512,
 479                                       count + 512);
 480 #endif  // CONFIG_VP9_HIGHBITDEPTH
 481         }
 482       }
 483
 484 #if CONFIG_VP9_HIGHBITDEPTH
 485       if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 486         uint16_t *dst1_16;
 487         uint16_t *dst2_16;
 488         // Normalize filter output to produce AltRef frame
 489         dst1 = cpi->alt_ref_buffer.y_buffer;
 490         dst1_16 = CONVERT_TO_SHORTPTR(dst1);
 491         stride = cpi->alt_ref_buffer.y_stride;
 492         byte = mb_y_offset;
 493         for (i = 0, k = 0; i < 16; i++) {
 494           for (j = 0; j < 16; j++, k++) {
 495             unsigned int pval = accumulator[k] + (count[k] >> 1);
 496             pval *= fixed_divide[count[k]];
 497             pval >>= 19;
 498
 499             dst1_16[byte] = (uint16_t)pval;
 500
 501             // move to next pixel
 502             byte++;
 503           }
 504
 505           byte += stride - 16;
 506         }
 507
 508         dst1 = cpi->alt_ref_buffer.u_buffer;
 509         dst2 = cpi->alt_ref_buffer.v_buffer;
 510         dst1_16 = CONVERT_TO_SHORTPTR(dst1);
 511         dst2_16 = CONVERT_TO_SHORTPTR(dst2);
 512         stride = cpi->alt_ref_buffer.uv_stride;
 513         byte = mb_uv_offset;
 514         for (i = 0, k = 256; i < mb_uv_height; i++) {
 515           for (j = 0; j < mb_uv_width; j++, k++) {
 516             int m = k + 256;
 517
 518             // U
 519             unsigned int pval = accumulator[k] + (count[k] >> 1);
 520             pval *= fixed_divide[count[k]];
 521             pval >>= 19;
 522             dst1_16[byte] = (uint16_t)pval;
 523
 524             // V
 525             pval = accumulator[m] + (count[m] >> 1);
 526             pval *= fixed_divide[count[m]];
 527             pval >>= 19;
 528             dst2_16[byte] = (uint16_t)pval;
 529
 530             // move to next pixel
 531             byte++;
 532           }
 533
 534           byte += stride - mb_uv_width;
 535         }
 536       } else {
 537         // Normalize filter output to produce AltRef frame
 538         dst1 = cpi->alt_ref_buffer.y_buffer;
 539         stride = cpi->alt_ref_buffer.y_stride;
 540         byte = mb_y_offset;
 541         for (i = 0, k = 0; i < 16; i++) {
 542           for (j = 0; j < 16; j++, k++) {
 543             unsigned int pval = accumulator[k] + (count[k] >> 1);
 544             pval *= fixed_divide[count[k]];
 545             pval >>= 19;
 546
 547             dst1[byte] = (uint8_t)pval;
 548
 549             // move to next pixel
 550             byte++;
 551           }
 552           byte += stride - 16;
 553         }
 554
 555         dst1 = cpi->alt_ref_buffer.u_buffer;
 556         dst2 = cpi->alt_ref_buffer.v_buffer;
 557         stride = cpi->alt_ref_buffer.uv_stride;
 558         byte = mb_uv_offset;
 559         for (i = 0, k = 256; i < mb_uv_height; i++) {
 560           for (j = 0; j < mb_uv_width; j++, k++) {
 561             int m = k + 256;
 562
 563             // U
 564             unsigned int pval = accumulator[k] + (count[k] >> 1);
 565             pval *= fixed_divide[count[k]];
 566             pval >>= 19;
 567             dst1[byte] = (uint8_t)pval;
 568
 569             // V
 570             pval = accumulator[m] + (count[m] >> 1);
 571             pval *= fixed_divide[count[m]];
 572             pval >>= 19;
 573             dst2[byte] = (uint8_t)pval;
 574
 575             // move to next pixel
 576             byte++;
 577           }
 578           byte += stride - mb_uv_width;
 579         }
 580       }
 581 #else
 582       // Normalize filter output to produce AltRef frame
 583       dst1 = cpi->alt_ref_buffer.y_buffer;
 584       stride = cpi->alt_ref_buffer.y_stride;
 585       byte = mb_y_offset;
 586       for (i = 0, k = 0; i < 16; i++) {
 587         for (j = 0; j < 16; j++, k++) {
 588           unsigned int pval = accumulator[k] + (count[k] >> 1);
 589           pval *= fixed_divide[count[k]];
 590           pval >>= 19;
 591
 592           dst1[byte] = (uint8_t)pval;
 593
 594           // move to next pixel
 595           byte++;
 596         }
 597         byte += stride - 16;
 598       }
 599
 600       dst1 = cpi->alt_ref_buffer.u_buffer;
 601       dst2 = cpi->alt_ref_buffer.v_buffer;
 602       stride = cpi->alt_ref_buffer.uv_stride;
 603       byte = mb_uv_offset;
 604       for (i = 0, k = 256; i < mb_uv_height; i++) {
 605         for (j = 0; j < mb_uv_width; j++, k++) {
 606           int m = k + 256;
 607
 608           // U
 609           unsigned int pval = accumulator[k] + (count[k] >> 1);
 610           pval *= fixed_divide[count[k]];
 611           pval >>= 19;
 612           dst1[byte] = (uint8_t)pval;
 613
 614           // V
 615           pval = accumulator[m] + (count[m] >> 1);
 616           pval *= fixed_divide[count[m]];
 617           pval >>= 19;
 618           dst2[byte] = (uint8_t)pval;
 619
 620           // move to next pixel
 621           byte++;
 622         }
 623         byte += stride - mb_uv_width;
 624       }
 625 #endif  // CONFIG_VP9_HIGHBITDEPTH
 626       mb_y_offset += 16;
 627       mb_uv_offset += mb_uv_width;
 628     }
 629     mb_y_offset += 16 * (f->y_stride - mb_cols);
 630     mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
 631   }
 632
 633   // Restore input state
 634   for (i = 0; i < MAX_MB_PLANE; i++)
 635     mbd->plane[i].pre[0].buf = input_buffer[i];
 636 }
 637
 638 // Apply buffer limits and context specific adjustments to arnr filter.
 639 static void adjust_arnr_filter(VP9_COMP *cpi,
 640                                int distance, int group_boost,
 641                                int *arnr_frames, int *arnr_strength) {
 642   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 643   const int frames_after_arf =
 644       vp9_lookahead_depth(cpi->lookahead) - distance - 1;
 645   int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
 646   int frames_bwd;
 647   int q, frames, strength;
 648
 649   // Define the forward and backwards filter limits for this arnr group.
 650   if (frames_fwd > frames_after_arf)
 651     frames_fwd = frames_after_arf;
 652   if (frames_fwd > distance)
 653     frames_fwd = distance;
 654
 655   frames_bwd = frames_fwd;
 656
 657   // For even length filter there is one more frame backward
 658   // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
 659   if (frames_bwd < distance)
 660     frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
 661
 662   // Set the baseline active filter size.
 663   frames = frames_bwd + 1 + frames_fwd;
 664
 665   // Adjust the strength based on active max q.
 666   if (cpi->common.current_video_frame > 1)
 667     q = ((int)vp9_convert_qindex_to_q(
 668         cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth));
 669   else
 670     q = ((int)vp9_convert_qindex_to_q(
 671         cpi->rc.avg_frame_qindex[KEY_FRAME], cpi->common.bit_depth));
 672   if (q > 16) {
 673     strength = oxcf->arnr_strength;
 674   } else {
 675     strength = oxcf->arnr_strength - ((16 - q) / 2);
 676     if (strength < 0)
 677       strength = 0;
 678   }
 679
 680   // Adjust number of frames in filter and strength based on gf boost level.
 681   if (frames > group_boost / 150) {
 682     frames = group_boost / 150;
 683     frames += !(frames & 1);
 684   }
 685
 686   if (strength > group_boost / 300) {
 687     strength = group_boost / 300;
 688   }
 689
 690   // Adjustments for second level arf in multi arf case.
 691   if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
 692     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 693     if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
 694       strength >>= 1;
 695     }
 696   }
 697
 698   *arnr_frames = frames;
 699   *arnr_strength = strength;
 700 }
 701
 702 void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
 703   VP9_COMMON *const cm = &cpi->common;
 704   RATE_CONTROL *const rc = &cpi->rc;
 705   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
 706   int frame;
 707   int frames_to_blur;
 708   int start_frame;
 709   int strength;
 710   int frames_to_blur_backward;
 711   int frames_to_blur_forward;
 712   struct scale_factors sf;
 713   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = {NULL};
 714
 715   // Apply context specific adjustments to the arnr filter parameters.
 716   adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
 717   frames_to_blur_backward = (frames_to_blur / 2);
 718   frames_to_blur_forward = ((frames_to_blur - 1) / 2);
 719   start_frame = distance + frames_to_blur_forward;
 720
 721   // Setup frame pointers, NULL indicates frame not included in filter.
 722   for (frame = 0; frame < frames_to_blur; ++frame) {
 723     const int which_buffer = start_frame - frame;
 724     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
 725                                                      which_buffer);
 726     frames[frames_to_blur - 1 - frame] = &buf->img;
 727   }
 728
 729   if (frames_to_blur > 0) {
 730     // Setup scaling factors. Scaling on each of the arnr frames is not
 731     // supported.
 732     if (cpi->use_svc) {
 733       // In spatial svc the scaling factors might be less then 1/2.
 734       // So we will use non-normative scaling.
 735       int frame_used = 0;
 736 #if CONFIG_VP9_HIGHBITDEPTH
 737       vp9_setup_scale_factors_for_frame(
 738           &sf,
 739           get_frame_new_buffer(cm)->y_crop_width,
 740           get_frame_new_buffer(cm)->y_crop_height,
 741           get_frame_new_buffer(cm)->y_crop_width,
 742           get_frame_new_buffer(cm)->y_crop_height,
 743           cm->use_highbitdepth);
 744 #else
 745       vp9_setup_scale_factors_for_frame(
 746           &sf,
 747           get_frame_new_buffer(cm)->y_crop_width,
 748           get_frame_new_buffer(cm)->y_crop_height,
 749           get_frame_new_buffer(cm)->y_crop_width,
 750           get_frame_new_buffer(cm)->y_crop_height);
 751 #endif  // CONFIG_VP9_HIGHBITDEPTH
 752
 753       for (frame = 0; frame < frames_to_blur; ++frame) {
 754         if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
 755             cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
 756           if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
 757                                        cm->width, cm->height,
 758                                        cm->subsampling_x, cm->subsampling_y,
 759 #if CONFIG_VP9_HIGHBITDEPTH
 760                                        cm->use_highbitdepth,
 761 #endif
 762                                        VP9_ENC_BORDER_IN_PIXELS,
 763                                        cm->byte_alignment,
 764                                        NULL, NULL, NULL)) {
 765             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
 766                                "Failed to reallocate alt_ref_buffer");
 767           }
 768           frames[frame] = vp9_scale_if_required(
 769               cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0);
 770           ++frame_used;
 771         }
 772       }
 773       cm->mi = cm->mip + cm->mi_stride + 1;
 774       xd->mi = cm->mi_grid_visible;
 775       xd->mi[0] = cm->mi;
 776     } else {
 777       // ARF is produced at the native frame size and resized when coded.
 778 #if CONFIG_VP9_HIGHBITDEPTH
 779       vp9_setup_scale_factors_for_frame(&sf,
 780                                         frames[0]->y_crop_width,
 781                                         frames[0]->y_crop_height,
 782                                         frames[0]->y_crop_width,
 783                                         frames[0]->y_crop_height,
 784                                         cm->use_highbitdepth);
 785 #else
 786       vp9_setup_scale_factors_for_frame(&sf,
 787                                         frames[0]->y_crop_width,
 788                                         frames[0]->y_crop_height,
 789                                         frames[0]->y_crop_width,
 790                                         frames[0]->y_crop_height);
 791 #endif  // CONFIG_VP9_HIGHBITDEPTH
 792     }
 793   }
 794
 795   temporal_filter_iterate_c(cpi, frames, frames_to_blur,
 796                             frames_to_blur_backward, strength, &sf);
 797 }