granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_temporal_filter.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <limits.h>
  13
  14 #include "vp9/common/vp9_onyxc_int.h"
  15 #include "vp9/common/vp9_reconinter.h"
  16 #include "vp9/encoder/vp9_onyx_int.h"
  17 #include "vp9/common/vp9_systemdependent.h"
  18 #include "vp9/encoder/vp9_quantize.h"
  19 #include "vp9/common/vp9_alloccommon.h"
  20 #include "vp9/encoder/vp9_mcomp.h"
  21 #include "vp9/encoder/vp9_firstpass.h"
  22 #include "vp9/encoder/vp9_psnr.h"
  23 #include "vpx_scale/vpx_scale.h"
  24 #include "vp9/common/vp9_extend.h"
  25 #include "vp9/encoder/vp9_ratectrl.h"
  26 #include "vp9/common/vp9_quant_common.h"
  27 #include "vp9/encoder/vp9_segmentation.h"
  28 #include "vpx_mem/vpx_mem.h"
  29 #include "vpx_ports/vpx_timer.h"
  30
  31 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
  32 #define ALT_REF_SUBPEL_ENABLED 1  // dis/enable subpel in MC AltRef filtering
  33
  34 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
  35                                             uint8_t *y_mb_ptr,
  36                                             uint8_t *u_mb_ptr,
  37                                             uint8_t *v_mb_ptr,
  38                                             int stride,
  39                                             int mv_row,
  40                                             int mv_col,
  41                                             uint8_t *pred,
  42                                             struct scale_factors *scale) {
  43   const int which_mv = 0;
  44   MV mv = { mv_row, mv_col };
  45
  46   vp9_build_inter_predictor(y_mb_ptr, stride,
  47                             &pred[0], 16,
  48                             &mv,
  49                             scale,
  50                             16, 16,
  51                             which_mv,
  52                             &xd->subpix, MV_PRECISION_Q3);
  53
  54   stride = (stride + 1) >> 1;
  55
  56   vp9_build_inter_predictor(u_mb_ptr, stride,
  57                             &pred[256], 8,
  58                             &mv,
  59                             scale,
  60                             8, 8,
  61                             which_mv,
  62                             &xd->subpix, MV_PRECISION_Q4);
  63
  64   vp9_build_inter_predictor(v_mb_ptr, stride,
  65                             &pred[320], 8,
  66                             &mv,
  67                             scale,
  68                             8, 8,
  69                             which_mv,
  70                             &xd->subpix, MV_PRECISION_Q4);
  71 }
  72
  73 void vp9_temporal_filter_apply_c(uint8_t *frame1,
  74                                  unsigned int stride,
  75                                  uint8_t *frame2,
  76                                  unsigned int block_size,
  77                                  int strength,
  78                                  int filter_weight,
  79                                  unsigned int *accumulator,
  80                                  uint16_t *count) {
  81   unsigned int i, j, k;
  82   int modifier;
  83   int byte = 0;
  84
  85   for (i = 0, k = 0; i < block_size; i++) {
  86     for (j = 0; j < block_size; j++, k++) {
  87       int src_byte = frame1[byte];
  88       int pixel_value = *frame2++;
  89
  90       modifier   = src_byte - pixel_value;
  91       // This is an integer approximation of:
  92       // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
  93       // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
  94       modifier  *= modifier;
  95       modifier  *= 3;
  96       modifier  += 1 << (strength - 1);
  97       modifier >>= strength;
  98
  99       if (modifier > 16)
 100         modifier = 16;
 101
 102       modifier = 16 - modifier;
 103       modifier *= filter_weight;
 104
 105       count[k] += modifier;
 106       accumulator[k] += modifier * pixel_value;
 107
 108       byte++;
 109     }
 110
 111     byte += stride - block_size;
 112   }
 113 }
 114
 115 #if ALT_REF_MC_ENABLED
 116
 117 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
 118                                               uint8_t *arf_frame_buf,
 119                                               uint8_t *frame_ptr_buf,
 120                                               int stride,
 121                                               int error_thresh) {
 122   MACROBLOCK *x = &cpi->mb;
 123   MACROBLOCKD* const xd = &x->e_mbd;
 124   int step_param;
 125   int sadpb = x->sadperbit16;
 126   int bestsme = INT_MAX;
 127
 128   int_mv best_ref_mv1;
 129   int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
 130   int_mv *ref_mv;
 131
 132   // Save input state
 133   struct buf_2d src = x->plane[0].src;
 134   struct buf_2d pre = xd->plane[0].pre[0];
 135
 136   best_ref_mv1.as_int = 0;
 137   best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3;
 138   best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3;
 139
 140   // Setup frame pointers
 141   x->plane[0].src.buf = arf_frame_buf;
 142   x->plane[0].src.stride = stride;
 143   xd->plane[0].pre[0].buf = frame_ptr_buf;
 144   xd->plane[0].pre[0].stride = stride;
 145
 146   // Further step/diamond searches as necessary
 147   if (cpi->speed < 8)
 148     step_param = cpi->sf.reduce_first_step_size + ((cpi->speed > 5) ? 1 : 0);
 149   else
 150     step_param = cpi->sf.reduce_first_step_size + 2;
 151   step_param = MIN(step_param, (cpi->sf.max_step_search_steps - 2));
 152
 153   /*cpi->sf.search_method == HEX*/
 154   // Ignore mv costing by sending NULL pointer instead of cost arrays
 155   ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0];
 156   bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv,
 157                            step_param, sadpb, 1,
 158                            &cpi->fn_ptr[BLOCK_16X16],
 159                            0, &best_ref_mv1.as_mv, &ref_mv->as_mv);
 160
 161 #if ALT_REF_SUBPEL_ENABLED
 162   // Try sub-pixel MC?
 163   // if (bestsme > error_thresh && bestsme < INT_MAX)
 164   {
 165     int distortion;
 166     unsigned int sse;
 167     // Ignore mv costing by sending NULL pointer instead of cost array
 168     bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
 169                                            &best_ref_mv1.as_mv,
 170                                            cpi->common.allow_high_precision_mv,
 171                                            x->errorperbit,
 172                                            &cpi->fn_ptr[BLOCK_16X16],
 173                                            0, cpi->sf.subpel_iters_per_step,
 174                                            NULL, NULL,
 175                                            &distortion, &sse);
 176   }
 177 #endif
 178
 179   // Restore input state
 180   x->plane[0].src = src;
 181   xd->plane[0].pre[0] = pre;
 182
 183   return bestsme;
 184 }
 185 #endif
 186
 187 static void temporal_filter_iterate_c(VP9_COMP *cpi,
 188                                       int frame_count,
 189                                       int alt_ref_index,
 190                                       int strength,
 191                                       struct scale_factors *scale) {
 192   int byte;
 193   int frame;
 194   int mb_col, mb_row;
 195   unsigned int filter_weight;
 196   int mb_cols = cpi->common.mb_cols;
 197   int mb_rows = cpi->common.mb_rows;
 198   int mb_y_offset = 0;
 199   int mb_uv_offset = 0;
 200   DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8);
 201   DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 + 8 * 8 + 8 * 8);
 202   MACROBLOCKD *mbd = &cpi->mb.e_mbd;
 203   YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
 204   uint8_t *dst1, *dst2;
 205   DECLARE_ALIGNED_ARRAY(16, uint8_t,  predictor, 16 * 16 + 8 * 8 + 8 * 8);
 206
 207   // Save input state
 208   uint8_t* input_buffer[MAX_MB_PLANE];
 209   int i;
 210
 211   for (i = 0; i < MAX_MB_PLANE; i++)
 212     input_buffer[i] = mbd->plane[i].pre[0].buf;
 213
 214   for (mb_row = 0; mb_row < mb_rows; mb_row++) {
 215 #if ALT_REF_MC_ENABLED
 216     // Source frames are extended to 16 pixels.  This is different than
 217     //  L/A/G reference frames that have a border of 32 (VP9BORDERINPIXELS)
 218     // A 6/8 tap filter is used for motion search.  This requires 2 pixels
 219     //  before and 3 pixels after.  So the largest Y mv on a border would
 220     //  then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
 221     //  Y and therefore only extended by 8.  The largest mv that a UV block
 222     //  can support is 8 - VP9_INTERP_EXTEND.  A UV mv is half of a Y mv.
 223     //  (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
 224     //  8 - VP9_INTERP_EXTEND.
 225     // To keep the mv in play for both Y and UV planes the max that it
 226     //  can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
 227     cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
 228     cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
 229                          + (17 - 2 * VP9_INTERP_EXTEND);
 230 #endif
 231
 232     for (mb_col = 0; mb_col < mb_cols; mb_col++) {
 233       int i, j, k;
 234       int stride;
 235
 236       vpx_memset(accumulator, 0, 384 * sizeof(unsigned int));
 237       vpx_memset(count, 0, 384 * sizeof(uint16_t));
 238
 239 #if ALT_REF_MC_ENABLED
 240       cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
 241       cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
 242                            + (17 - 2 * VP9_INTERP_EXTEND);
 243 #endif
 244
 245       for (frame = 0; frame < frame_count; frame++) {
 246         if (cpi->frames[frame] == NULL)
 247           continue;
 248
 249         mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0;
 250         mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0;
 251
 252         if (frame == alt_ref_index) {
 253           filter_weight = 2;
 254         } else {
 255           int err = 0;
 256 #if ALT_REF_MC_ENABLED
 257 #define THRESH_LOW   10000
 258 #define THRESH_HIGH  20000
 259
 260           // Find best match in this frame by MC
 261           err = temporal_filter_find_matching_mb_c
 262                 (cpi,
 263                  cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
 264                  cpi->frames[frame]->y_buffer + mb_y_offset,
 265                  cpi->frames[frame]->y_stride,
 266                  THRESH_LOW);
 267 #endif
 268           // Assign higher weight to matching MB if it's error
 269           // score is lower. If not applying MC default behavior
 270           // is to weight all MBs equal.
 271           filter_weight = err < THRESH_LOW
 272                           ? 2 : err < THRESH_HIGH ? 1 : 0;
 273         }
 274
 275         if (filter_weight != 0) {
 276           // Construct the predictors
 277           temporal_filter_predictors_mb_c
 278           (mbd,
 279            cpi->frames[frame]->y_buffer + mb_y_offset,
 280            cpi->frames[frame]->u_buffer + mb_uv_offset,
 281            cpi->frames[frame]->v_buffer + mb_uv_offset,
 282            cpi->frames[frame]->y_stride,
 283            mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row,
 284            mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col,
 285            predictor, scale);
 286
 287           // Apply the filter (YUV)
 288           vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
 289                                     predictor, 16, strength, filter_weight,
 290                                     accumulator, count);
 291
 292           vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
 293                                     predictor + 256, 8, strength, filter_weight,
 294                                     accumulator + 256, count + 256);
 295
 296           vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
 297                                     predictor + 320, 8, strength, filter_weight,
 298                                     accumulator + 320, count + 320);
 299         }
 300       }
 301
 302       // Normalize filter output to produce AltRef frame
 303       dst1 = cpi->alt_ref_buffer.y_buffer;
 304       stride = cpi->alt_ref_buffer.y_stride;
 305       byte = mb_y_offset;
 306       for (i = 0, k = 0; i < 16; i++) {
 307         for (j = 0; j < 16; j++, k++) {
 308           unsigned int pval = accumulator[k] + (count[k] >> 1);
 309           pval *= cpi->fixed_divide[count[k]];
 310           pval >>= 19;
 311
 312           dst1[byte] = (uint8_t)pval;
 313
 314           // move to next pixel
 315           byte++;
 316         }
 317
 318         byte += stride - 16;
 319       }
 320
 321       dst1 = cpi->alt_ref_buffer.u_buffer;
 322       dst2 = cpi->alt_ref_buffer.v_buffer;
 323       stride = cpi->alt_ref_buffer.uv_stride;
 324       byte = mb_uv_offset;
 325       for (i = 0, k = 256; i < 8; i++) {
 326         for (j = 0; j < 8; j++, k++) {
 327           int m = k + 64;
 328
 329           // U
 330           unsigned int pval = accumulator[k] + (count[k] >> 1);
 331           pval *= cpi->fixed_divide[count[k]];
 332           pval >>= 19;
 333           dst1[byte] = (uint8_t)pval;
 334
 335           // V
 336           pval = accumulator[m] + (count[m] >> 1);
 337           pval *= cpi->fixed_divide[count[m]];
 338           pval >>= 19;
 339           dst2[byte] = (uint8_t)pval;
 340
 341           // move to next pixel
 342           byte++;
 343         }
 344
 345         byte += stride - 8;
 346       }
 347
 348       mb_y_offset += 16;
 349       mb_uv_offset += 8;
 350     }
 351
 352     mb_y_offset += 16 * (f->y_stride - mb_cols);
 353     mb_uv_offset += 8 * (f->uv_stride - mb_cols);
 354   }
 355
 356   // Restore input state
 357   for (i = 0; i < MAX_MB_PLANE; i++)
 358     mbd->plane[i].pre[0].buf = input_buffer[i];
 359 }
 360
 361 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 362   VP9_COMMON *const cm = &cpi->common;
 363
 364   int frame = 0;
 365
 366   int frames_to_blur_backward = 0;
 367   int frames_to_blur_forward = 0;
 368   int frames_to_blur = 0;
 369   int start_frame = 0;
 370
 371   int strength = cpi->active_arnr_strength;
 372   int blur_type = cpi->oxcf.arnr_type;
 373   int max_frames = cpi->active_arnr_frames;
 374
 375   const int num_frames_backward = distance;
 376   const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
 377                                - (num_frames_backward + 1);
 378
 379   struct scale_factors scale;
 380   struct scale_factors_common scale_comm;
 381
 382   switch (blur_type) {
 383     case 1:
 384       // Backward Blur
 385       frames_to_blur_backward = num_frames_backward;
 386
 387       if (frames_to_blur_backward >= max_frames)
 388         frames_to_blur_backward = max_frames - 1;
 389
 390       frames_to_blur = frames_to_blur_backward + 1;
 391       break;
 392
 393     case 2:
 394       // Forward Blur
 395
 396       frames_to_blur_forward = num_frames_forward;
 397
 398       if (frames_to_blur_forward >= max_frames)
 399         frames_to_blur_forward = max_frames - 1;
 400
 401       frames_to_blur = frames_to_blur_forward + 1;
 402       break;
 403
 404     case 3:
 405     default:
 406       // Center Blur
 407       frames_to_blur_forward = num_frames_forward;
 408       frames_to_blur_backward = num_frames_backward;
 409
 410       if (frames_to_blur_forward > frames_to_blur_backward)
 411         frames_to_blur_forward = frames_to_blur_backward;
 412
 413       if (frames_to_blur_backward > frames_to_blur_forward)
 414         frames_to_blur_backward = frames_to_blur_forward;
 415
 416       // When max_frames is even we have 1 more frame backward than forward
 417       if (frames_to_blur_forward > (max_frames - 1) / 2)
 418         frames_to_blur_forward = ((max_frames - 1) / 2);
 419
 420       if (frames_to_blur_backward > (max_frames / 2))
 421         frames_to_blur_backward = (max_frames / 2);
 422
 423       frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
 424       break;
 425   }
 426
 427   start_frame = distance + frames_to_blur_forward;
 428
 429 #ifdef DEBUGFWG
 430   // DEBUG FWG
 431   printf(
 432       "max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d "
 433       "start:%d",
 434       max_frames, num_frames_backward, num_frames_forward, frames_to_blur,
 435       frames_to_blur_backward, frames_to_blur_forward, cpi->source_encode_index,
 436       cpi->last_alt_ref_sei, start_frame);
 437 #endif
 438
 439   // Setup scaling factors. Scaling on each of the arnr frames is not supported
 440   vp9_setup_scale_factors_for_frame(&scale, &scale_comm,
 441       get_frame_new_buffer(cm)->y_crop_width,
 442       get_frame_new_buffer(cm)->y_crop_height,
 443       cm->width, cm->height);
 444
 445   // Setup frame pointers, NULL indicates frame not included in filter
 446   vp9_zero(cpi->frames);
 447   for (frame = 0; frame < frames_to_blur; frame++) {
 448     int which_buffer = start_frame - frame;
 449     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
 450                                                      which_buffer);
 451     cpi->frames[frames_to_blur - 1 - frame] = &buf->img;
 452   }
 453
 454   temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward,
 455                             strength, &scale);
 456 }
 457
 458 void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
 459                            const int group_boost) {
 460   int half_gf_int;
 461   int frames_after_arf;
 462   int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
 463   int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
 464   int q;
 465
 466   // Define the arnr filter width for this group of frames:
 467   // We only filter frames that lie within a distance of half
 468   // the GF interval from the ARF frame. We also have to trap
 469   // cases where the filter extends beyond the end of clip.
 470   // Note: this_frame->frame has been updated in the loop
 471   // so it now points at the ARF frame.
 472   half_gf_int = cpi->baseline_gf_interval >> 1;
 473   frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1);
 474
 475   switch (cpi->oxcf.arnr_type) {
 476     case 1:  // Backward filter
 477       frames_fwd = 0;
 478       if (frames_bwd > half_gf_int)
 479         frames_bwd = half_gf_int;
 480       break;
 481
 482     case 2:  // Forward filter
 483       if (frames_fwd > half_gf_int)
 484         frames_fwd = half_gf_int;
 485       if (frames_fwd > frames_after_arf)
 486         frames_fwd = frames_after_arf;
 487       frames_bwd = 0;
 488       break;
 489
 490     case 3:  // Centered filter
 491     default:
 492       frames_fwd >>= 1;
 493       if (frames_fwd > frames_after_arf)
 494         frames_fwd = frames_after_arf;
 495       if (frames_fwd > half_gf_int)
 496         frames_fwd = half_gf_int;
 497
 498       frames_bwd = frames_fwd;
 499
 500       // For even length filter there is one more frame backward
 501       // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
 502       if (frames_bwd < half_gf_int)
 503         frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
 504       break;
 505   }
 506
 507   cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
 508
 509   // Adjust the strength based on active max q
 510   q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1);
 511   if (q > 8) {
 512     cpi->active_arnr_strength = cpi->oxcf.arnr_strength;
 513   } else {
 514     cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q);
 515     if (cpi->active_arnr_strength < 0)
 516       cpi->active_arnr_strength = 0;
 517   }
 518
 519   // Adjust number of frames in filter and strength based on gf boost level.
 520   if (cpi->active_arnr_frames > (group_boost / 150)) {
 521     cpi->active_arnr_frames = (group_boost / 150);
 522     cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1);
 523   }
 524   if (cpi->active_arnr_strength > (group_boost / 300)) {
 525     cpi->active_arnr_strength = (group_boost / 300);
 526   }
 527 }