Move CLPF block signals from frame to SB level.

author Steinar Midtskogen <stemidts@cisco.com>

Wed, 21 Sep 2016 11:38:16 +0000 (13:38 +0200)

committer Yaowu Xu <yaowu@google.com>

Thu, 13 Oct 2016 23:06:10 +0000 (16:06 -0700)
author Steinar Midtskogen <stemidts@cisco.com>
Wed, 21 Sep 2016 11:38:16 +0000 (13:38 +0200)
committer Yaowu Xu <yaowu@google.com>
Thu, 13 Oct 2016 23:06:10 +0000 (16:06 -0700)
diff --git a/av1/common/clpf.c b/av1/common/clpf.c

index 1cf52724d84cd9890afe162222a479ec6433806c..a01e6b461b79ccc7307144390a8c7bab479321e8 100644 (file)
--- a/av1/common/clpf.c
+++ b/av1/common/clpf.c
@@ -14,14 +14,6 @@
  #include "aom/aom_image.h"
  #include "aom_dsp/aom_dsp_common.h"
  
-int av1_clpf_maxbits(const AV1_COMMON *cm) {
-  return get_msb(
-             ALIGN_POWER_OF_TWO(cm->mi_cols * MI_SIZE, cm->clpf_size + 4) *
-                 ALIGN_POWER_OF_TWO(cm->mi_rows * MI_SIZE, cm->clpf_size + 4) >>
-             (cm->clpf_size * 2 + 8)) +
-         1;
-}
-
  int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
    int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
                3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
@@ -73,14 +65,14 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
  #endif
  
  // Return number of filtered blocks
-int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
-                   const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
-                   int enable_fb_flag, unsigned int strength,
-                   unsigned int fb_size_log2, uint8_t *blocks, int plane,
-                   int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
-                                   const YV12_BUFFER_CONFIG *,
-                                   const AV1_COMMON *cm, int, int, int,
-                                   unsigned int, unsigned int, uint8_t *)) {
+void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
+                    const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
+                    int enable_fb_flag, unsigned int strength,
+                    unsigned int fb_size_log2, int plane,
+                    int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
+                                    const YV12_BUFFER_CONFIG *,
+                                    const AV1_COMMON *cm, int, int, int,
+                                    unsigned int, unsigned int, int8_t *)) {
    /* Constrained low-pass filter (CLPF) */
    int c, k, l, m, n;
    const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
@@ -95,7 +87,6 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
    int dstride = bs;
    const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
    const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
-  int block_index = 0;
    uint8_t *cache = NULL;
    uint8_t **cache_ptr = NULL;
    uint8_t **cache_dst = NULL;
@@ -125,7 +116,7 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
    for (k = 0; k < num_fb_ver; k++) {
      for (l = 0; l < num_fb_hor; l++) {
        int h, w;
-      int allskip = 1;
+      int allskip = !(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2);
        const int xoff = l << fb_size_log2;
        const int yoff = k << fb_size_log2;
        for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
@@ -148,8 +139,11 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
        w += !w << fb_size_log2;
        if (!allskip &&  // Do not filter the block if all is skip encoded
            (!enable_fb_flag ||
+           // Only called if fb_flag enabled (luma only)
             decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
-                    fb_size_log2, blocks + block_index))) {
+                    fb_size_log2,
+                    cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride +
+                        xoff / MIN_FB_SIZE))) {
          // Iterate over all smaller blocks inside the filter block
          for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
            for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
@@ -160,8 +154,9 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
              sizey = AOMMIN(height - ypos, bs);
              if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
                                       (xpos << subx) / MI_SIZE]
-                     ->mbmi.skip) {  // Not skip block
-              // Temporary buffering needed if filtering in-place
+                     ->mbmi.skip ||
+                (enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) {
+              // Temporary buffering needed for in-place filtering
                if (cache_ptr[cache_idx]) {
  // Copy filtered block back into the frame
  #if CONFIG_AOM_HIGHBITDEPTH
@@ -247,7 +242,6 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
            }
          }
        }
-      block_index += !allskip;  // Count number of blocks filtered
      }
    }
  
@@ -287,6 +281,4 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
    aom_free(cache);
    aom_free(cache_ptr);
    aom_free(cache_dst);
-
-  return block_index;
  }
diff --git a/av1/common/clpf.h b/av1/common/clpf.h

index 8e4213b201a6bcfa5c8e0624bec24932073ff313..fc74f2ccadc8cdd245a67595b40865adc02e87d1 100644 (file)
--- a/av1/common/clpf.h
+++ b/av1/common/clpf.h
@@ -13,17 +13,19 @@
  
  #include "av1/common/reconinter.h"
  
-#define MAX_FB_SIZE 128
+#define MAX_FB_SIZE_LOG2 7
+#define MIN_FB_SIZE_LOG2 5
+#define MAX_FB_SIZE (1 << MAX_FB_SIZE_LOG2)
+#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2)
  
-int av1_clpf_maxbits(const AV1_COMMON *cm);
  int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
-int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
-                   const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
-                   int enable_fb_flag, unsigned int strength,
-                   unsigned int fb_size_log2, uint8_t *blocks, int plane,
-                   int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
-                                   const YV12_BUFFER_CONFIG *,
-                                   const AV1_COMMON *cm, int, int, int,
-                                   unsigned int, unsigned int, uint8_t *));
+void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
+                    const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
+                    int enable_fb_flag, unsigned int strength,
+                    unsigned int fb_size_log2, int plane,
+                    int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
+                                    const YV12_BUFFER_CONFIG *,
+                                    const AV1_COMMON *cm, int, int, int,
+                                    unsigned int, unsigned int, int8_t *));
  
  #endif
diff --git a/av1/common/enums.h b/av1/common/enums.h

index c8776ef252729ec59554e5660e7c2bcc63090181..7c632554273b975dae9dba551ba6be0e05067ca6 100644 (file)
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -246,7 +246,17 @@ typedef enum {
    PALETTE_COLORS
  } PALETTE_COLOR;
  
+#ifdef CONFIG_CLPF
+#define CLPF_NOFLAG -1
+typedef enum {
+  CLPF_NOSIZE = 0,
+  CLPF_32X32 = 1,
+  CLPF_64X64 = 2,
+  CLPF_128X128 = 3
+} CLPF_BLOCK_SIZE;
+#endif
  typedef enum ATTRIBUTE_PACKED {
+
    DC_PRED,    // Average of above and left pixels
    V_PRED,     // Vertical
    H_PRED,     // Horizontal
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h

index 98f4f517c7b2d1fba534a41c4a931db3ae90f7e7..3a2203ac11d65b063b8cf9c7daad9b30ef0c16bb 100644 (file)
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -151,12 +151,27 @@ typedef struct AV1Common {
    int use_highbitdepth;
  #endif
  #if CONFIG_CLPF
-  int clpf_numblocks;
-  int clpf_size;
+  // Two bits are used to signal the strength for all blocks and the
+  // valid values are:
+  // 0: no filtering
+  // 1: strength = 1
+  // 2: strength = 2
+  // 3: strength = 4
    int clpf_strength_y;
    int clpf_strength_u;
    int clpf_strength_v;
-  uint8_t *clpf_blocks;
+
+  // If clpf_strength_y is not 0, another two bits are used to signal
+  // the filter block size.  The valid values for clfp_size are:
+  // 0: no block signalling
+  // 1: 32x32
+  // 2: 64x64
+  // 3: 128x128
+  CLPF_BLOCK_SIZE clpf_size;
+
+  // Buffer for storing whether to filter individual blocks.
+  int8_t *clpf_blocks;
+  int clpf_stride;
  #endif
  
    YV12_BUFFER_CONFIG *frame_to_show;
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c

index e1bbe24f9eff641c0238a870f8c7e2ad7390937c..e41f16609139bc8a79b7be37e33b5183b57faf29 100644 (file)
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1440,6 +1440,22 @@ static int read_skip(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
    }
  }
  #endif  // CONFIG_SUPERTX
+#if CONFIG_CLPF
+static int clpf_all_skip(const AV1_COMMON *cm, int mi_col, int mi_row,
+                         int size) {
+  int r, c;
+  int skip = 1;
+  const int maxc = AOMMIN(size, cm->mi_cols - mi_col);
+  const int maxr = AOMMIN(size, cm->mi_rows - mi_row);
+  for (r = 0; r < maxr && skip; r++) {
+    for (c = 0; c < maxc && skip; c++) {
+      skip &= !!cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]
+                    ->mbmi.skip;
+    }
+  }
+  return skip;
+}
+#endif
  
  // TODO(slavarnway): eliminate bsize and subsize in future commits
  static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
@@ -1772,6 +1788,43 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
    if (bsize >= BLOCK_8X8 &&
        (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
      dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
+
+#if CONFIG_CLPF
+  if (bsize == BLOCK_64X64 && cm->clpf_strength_y &&
+      cm->clpf_size != CLPF_NOSIZE) {
+    const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
+                   mi_col * MI_SIZE / MIN_FB_SIZE;
+
+    if (!((mi_row * MI_SIZE) & 127) && !((mi_col * MI_SIZE) & 127) &&
+        cm->clpf_size == CLPF_128X128) {
+      cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
+    } else if (cm->clpf_size == CLPF_64X64 &&
+               !clpf_all_skip(cm, mi_col, mi_row, 64 / MI_SIZE)) {
+      cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
+    } else if (cm->clpf_size == CLPF_32X32) {
+      const int tr = tl + 1;
+      const int bl = tl + cm->clpf_stride;
+      const int br = tr + cm->clpf_stride;
+      const int size = 32 / MI_SIZE;
+
+      // Up to four bits per SB
+      if (!clpf_all_skip(cm, mi_col, mi_row, size))
+        cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
+
+      if (mi_col + size < cm->mi_cols &&
+          !clpf_all_skip(cm, mi_col + size, mi_row, size))
+        cm->clpf_blocks[tr] = aom_read_literal(r, 1, ACCT_STR);
+
+      if (mi_row + size < cm->mi_rows &&
+          !clpf_all_skip(cm, mi_col, mi_row + size, size))
+        cm->clpf_blocks[bl] = aom_read_literal(r, 1, ACCT_STR);
+
+      if (mi_col + size < cm->mi_cols && mi_row + size < cm->mi_rows &&
+          !clpf_all_skip(cm, mi_col + size, mi_row + size, size))
+        cm->clpf_blocks[br] = aom_read_literal(r, 1, ACCT_STR);
+    }
+  }
+#endif
  #if CONFIG_DERING
    if (bsize == BLOCK_64X64) {
      if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
@@ -2045,20 +2098,26 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
  }
  
  #if CONFIG_CLPF
-static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static void setup_clpf(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) {
+  AV1_COMMON *const cm = &pbi->common;
+  const int width = pbi->cur_buf->buf.y_crop_width;
+  const int height = pbi->cur_buf->buf.y_crop_height;
+
    cm->clpf_blocks = 0;
    cm->clpf_strength_y = aom_rb_read_literal(rb, 2);
    cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
    cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
    if (cm->clpf_strength_y) {
      cm->clpf_size = aom_rb_read_literal(rb, 2);
-    if (cm->clpf_size) {
-      int i;
-      cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm));
-      CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks));
-      for (i = 0; i < cm->clpf_numblocks; i++) {
-        cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1);
-      }
+    if (cm->clpf_size != CLPF_NOSIZE) {
+      int size;
+      cm->clpf_stride =
+          ((width + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >> MIN_FB_SIZE_LOG2;
+      size =
+          cm->clpf_stride * ((height + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >>
+          MIN_FB_SIZE_LOG2;
+      CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size));
+      memset(cm->clpf_blocks, -1, size);
      }
    }
  }
@@ -2068,7 +2127,7 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
                      UNUSED const YV12_BUFFER_CONFIG *org,
                      UNUSED const AV1_COMMON *cm, UNUSED int block_size,
                      UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
-                    UNUSED unsigned int fb_size_log2, uint8_t *bit) {
+                    UNUSED unsigned int fb_size_log2, int8_t *bit) {
    return *bit;
  }
  #endif
@@ -3361,7 +3420,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
  
    setup_loopfilter(cm, rb);
  #if CONFIG_CLPF
-  setup_clpf(cm, rb);
+  setup_clpf(pbi, rb);
  #endif
  #if CONFIG_DERING
    setup_dering(cm, rb);
@@ -3933,18 +3992,18 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
    if (!cm->skip_loop_filter) {
      const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf;
      if (cm->clpf_strength_y) {
-      av1_clpf_frame(frame, NULL, cm, !!cm->clpf_size,
+      av1_clpf_frame(frame, NULL, cm, cm->clpf_size != CLPF_NOSIZE,
                       cm->clpf_strength_y + (cm->clpf_strength_y == 3),
-                     4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, clpf_bit);
+                     4 + cm->clpf_size, AOM_PLANE_Y, clpf_bit);
      }
      if (cm->clpf_strength_u) {
-      av1_clpf_frame(frame, NULL, cm, 0,
-                     cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, NULL,
+      av1_clpf_frame(frame, NULL, cm, 0,  // No block signals for chroma
+                     cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4,
                       AOM_PLANE_U, NULL);
      }
      if (cm->clpf_strength_v) {
-      av1_clpf_frame(frame, NULL, cm, 0,
-                     cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, NULL,
+      av1_clpf_frame(frame, NULL, cm, 0,  // No block signals for chroma
+                     cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4,
                       AOM_PLANE_V, NULL);
      }
    }
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c

index aaffebbcb4b867433a4c9a573b711a0345eed9bb..6604728d6a256a15184f298c2da0135f995071f0 100644 (file)
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1869,6 +1869,37 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
        (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
      update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  
+#if CONFIG_CLPF
+  if (bsize == BLOCK_64X64 && cm->clpf_blocks && cm->clpf_strength_y &&
+      cm->clpf_size != CLPF_NOSIZE) {
+    const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
+                   mi_col * MI_SIZE / MIN_FB_SIZE;
+    const int tr = tl + 1;
+    const int bl = tl + cm->clpf_stride;
+    const int br = tr + cm->clpf_stride;
+
+    // Up to four bits per SB.
+    // When clpf_size indicates a size larger than the SB size
+    // (CLPF_128X128), one bit for every fourth SB will be transmitted
+    // regardless of skip blocks.
+    if (cm->clpf_blocks[tl] != CLPF_NOFLAG)
+      aom_write_literal(w, cm->clpf_blocks[tl], 1);
+
+    if (mi_col + MI_SIZE / 2 < cm->mi_cols &&
+        cm->clpf_blocks[tr] != CLPF_NOFLAG)
+      aom_write_literal(w, cm->clpf_blocks[tr], 1);
+
+    if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
+        cm->clpf_blocks[bl] != CLPF_NOFLAG)
+      aom_write_literal(w, cm->clpf_blocks[bl], 1);
+
+    if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
+        mi_col + MI_SIZE / 2 < cm->mi_cols &&
+        cm->clpf_blocks[br] != CLPF_NOFLAG)
+      aom_write_literal(w, cm->clpf_blocks[br], 1);
+  }
+#endif
+
  #if CONFIG_DERING
    if (bsize == BLOCK_64X64 && cm->dering_level != 0 &&
        !sb_all_skip(cm, mi_row, mi_col)) {
@@ -2533,18 +2564,6 @@ static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
    aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
    if (cm->clpf_strength_y) {
      aom_wb_write_literal(wb, cm->clpf_size, 2);
-    if (cm->clpf_size) {
-      int i;
-      // TODO(stemidts): The number of bits to transmit could be
-      // implicitly deduced if transmitted after the filter block or
-      // after the frame (when it's known whether the block is all
-      // skip and implicitly unfiltered).  And the bits do not have
-      // 50% probability, so a more efficient coding is possible.
-      aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm));
-      for (i = 0; i < cm->clpf_numblocks; i++) {
-        aom_wb_write_literal(wb, cm->clpf_blocks ? cm->clpf_blocks[i] : 0, 1);
-      }
-    }
    }
  }
  #endif
diff --git a/av1/encoder/clpf_rdo.c b/av1/encoder/clpf_rdo.c

index 1d498f11b18f82286ccc29812102e1c69f49eefe..4e652b6cb441da36948aaf56354733a71b653937 100644 (file)
--- a/av1/encoder/clpf_rdo.c
+++ b/av1/encoder/clpf_rdo.c
@@ -127,14 +127,15 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
  int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
                        const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
                        int block_size, int w, int h, unsigned int strength,
-                      unsigned int fb_size_log2, uint8_t *res) {
+                      unsigned int fb_size_log2, int8_t *res) {
    int m, n, sum0 = 0, sum1 = 0;
  
    for (m = 0; m < h; m++) {
      for (n = 0; n < w; n++) {
        int xpos = (l << fb_size_log2) + n * block_size;
        int ypos = (k << fb_size_log2) + m * block_size;
-      if (!cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE]
+      if (fb_size_log2 == MAX_FB_SIZE_LOG2 ||
+          !cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE]
                 ->mbmi.skip) {
  #if CONFIG_AOM_HIGHBITDEPTH
          if (cm->use_highbitdepth) {
@@ -167,6 +168,8 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
  // (Only for luma:)
  // res[1][0]   : (bit count, fb size = 128)
  // res[1][1-3] : strength=1,2,4, fb size = 128
+// res[1][4]   : unfiltered, including skip
+// res[1][5-7] : strength=1,2,4, including skip, fb_size = 128
  // res[2][0]   : (bit count, fb size = 64)
  // res[2][1-3] : strength=1,2,4, fb size = 64
  // res[3][0]   : (bit count, fb size = 32)
@@ -174,9 +177,9 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
  static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
                      const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
                      unsigned int block_size, unsigned int fb_size_log2, int w,
-                    int h, int64_t res[4][4], int plane) {
+                    int h, int64_t res[4][8], int plane) {
    int c, m, n, filtered = 0;
-  int sum[4];
+  int sum[8];
    const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
    const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
    int bslog = get_msb(block_size);
@@ -193,12 +196,12 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
        plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
    int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
    int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
-  sum[0] = sum[1] = sum[2] = sum[3] = 0;
+  sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0;
    if (plane == AOM_PLANE_Y &&
        fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
      int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
  
-    fb_size_log2--;
+    filtered = fb_size_log2-- == MAX_FB_SIZE_LOG2;
      w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
      h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
      w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
@@ -210,8 +213,8 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
      oldfiltered = res[i][0];
      res[i][0] = 0;
  
-    filtered = clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
-                        res, plane);
+    filtered |= clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
+                         res, plane);
      if (1 << (fb_size_log2 - bslog) < w)
        filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
                             fb_size_log2, w2, h1, res, plane);
@@ -223,10 +226,18 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
                     cm, block_size, fb_size_log2, w2, h2, res, plane);
      }
  
+    // Correct sums for unfiltered blocks
      res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
      res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
      res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
+    if (i == 1) {
+      res[i][5] = AOMMIN(sum1 + res[i][4], res[i][5]);
+      res[i][6] = AOMMIN(sum2 + res[i][4], res[i][6]);
+      res[i][7] = AOMMIN(sum3 + res[i][4], res[i][7]);
+    }
+
      res[i][0] = oldfiltered + filtered;  // Number of signal bits
+
      return filtered;
    }
  
@@ -234,27 +245,28 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
      for (n = 0; n < w; n++) {
        int xpos = x + n * block_size;
        int ypos = y + m * block_size;
-      if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
-                               (xpos << subx) / MI_SIZE]
-               ->mbmi.skip) {
+      int skip =  // Filtered skip blocks stored only for fb_size == 128
+          4 *
+          !!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
+                                (xpos << subx) / MI_SIZE]
+                ->mbmi.skip;
  #if CONFIG_AOM_HIGHBITDEPTH
-        if (cm->use_highbitdepth) {
-          aom_clpf_detect_multi_hbd(
-              CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
-              rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
-              cm->bit_depth - 8, block_size);
-        } else {
-          aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
-                                xpos, ypos, rec_width, rec_height, sum,
-                                block_size);
-        }
-#else
+      if (cm->use_highbitdepth) {
+        aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec_buffer),
+                                  CONVERT_TO_SHORTPTR(org_buffer), rec_stride,
+                                  org_stride, xpos, ypos, rec_width, rec_height,
+                                  sum + skip, cm->bit_depth - 8, block_size);
+      } else {
          aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
-                              xpos, ypos, rec_width, rec_height, sum,
+                              xpos, ypos, rec_width, rec_height, sum + skip,
                                block_size);
-#endif
-        filtered = 1;
        }
+#else
+      aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
+                            xpos, ypos, rec_width, rec_height, sum + skip,
+                            block_size);
+#endif
+      filtered |= !skip;
      }
    }
  
@@ -263,6 +275,12 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
      res[c][1] += sum[1];
      res[c][2] += sum[2];
      res[c][3] += sum[3];
+    if (c != 1) continue;
+    // Only needed when fb_size == 128
+    res[c][4] += sum[4];
+    res[c][5] += sum[5];
+    res[c][6] += sum[6];
+    res[c][7] += sum[7];
    }
    return filtered;
  }
@@ -271,7 +289,7 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
                           const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
                           int *best_strength, int *best_bs, int plane) {
    int c, j, k, l;
-  int64_t best, sums[4][4];
+  int64_t best, sums[4][8];
    int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
    int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
    const int bs = MI_SIZE;
@@ -303,8 +321,14 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
        }
      }
  
-  if (plane != AOM_PLANE_Y)  // Slightly favour unfiltered chroma
+  // For fb_size == 128 skip blocks are included in the result.
+  if (plane == AOM_PLANE_Y) {
+    sums[1][1] += sums[1][5] - sums[1][4];
+    sums[1][2] += sums[1][6] - sums[1][4];
+    sums[1][3] += sums[1][7] - sums[1][4];
+  } else {  // Slightly favour unfiltered chroma
      sums[0][0] -= sums[0][0] >> 7;
+  }
  
    for (j = 0; j < 4; j++) {
      static const double lambda_square[] = {
diff --git a/av1/encoder/clpf_rdo.h b/av1/encoder/clpf_rdo.h

index bb85fbcea4b0f31ca07de5e8a9b6d37c2cc52694..586eed03d45a1b57e3c0c1d0ee7d30b62db38524 100644 (file)
--- a/av1/encoder/clpf_rdo.h
+++ b/av1/encoder/clpf_rdo.h
@@ -17,7 +17,7 @@
  int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
                        const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
                        int block_size, int w, int h, unsigned int strength,
-                      unsigned int fb_size_log2, uint8_t *res);
+                      unsigned int fb_size_log2, int8_t *res);
  
  void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
                           const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c

index 90b0416fc27d1bf5cd90a8502c7466bd5e581ba0..af7e5a63b4d3b4c2f4149c21b55637e522a78484 100644 (file)
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3408,12 +3408,23 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
    }
  #if CONFIG_CLPF
    cm->clpf_strength_y = cm->clpf_strength_u = cm->clpf_strength_v = 0;
-  cm->clpf_size = 2;
-  CHECK_MEM_ERROR(
-      cm, cm->clpf_blocks,
-      aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) *
-                     ((cm->frame_to_show->y_crop_height + 31) & ~31) >>
-                 10));
+  cm->clpf_size = CLPF_64X64;
+
+  // Allocate buffer to hold the status of all filter blocks:
+  // 1 = On, 0 = off, -1 = implicitly off
+  {
+    int size;
+    cm->clpf_stride = ((cm->frame_to_show->y_crop_width + MIN_FB_SIZE - 1) &
+                       ~(MIN_FB_SIZE - 1)) >>
+                      MIN_FB_SIZE_LOG2;
+    size = cm->clpf_stride *
+               ((cm->frame_to_show->y_crop_height + MIN_FB_SIZE - 1) &
+                ~(MIN_FB_SIZE - 1)) >>
+           MIN_FB_SIZE_LOG2;
+    CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size));
+    memset(cm->clpf_blocks, CLPF_NOFLAG, size);
+  }
+
    if (!is_lossless_requested(&cpi->oxcf)) {
      const YV12_BUFFER_CONFIG *const frame = cm->frame_to_show;
  
@@ -3428,20 +3439,18 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
        // Apply the filter using the chosen strength
        cm->clpf_strength_y = strength_y - (strength_y == 4);
        cm->clpf_size =
-          fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
-      cm->clpf_numblocks = av1_clpf_frame(
-          frame, cpi->Source, cm, !!cm->clpf_size, strength_y,
-          4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, av1_clpf_decision);
+          fb_size_log2 ? fb_size_log2 - MAX_FB_SIZE_LOG2 + 3 : CLPF_NOSIZE;
+      av1_clpf_frame(frame, cpi->Source, cm, cm->clpf_size != CLPF_NOSIZE,
+                     strength_y, 4 + cm->clpf_size, AOM_PLANE_Y,
+                     av1_clpf_decision);
      }
      if (strength_u) {
        cm->clpf_strength_u = strength_u - (strength_u == 4);
-      av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, NULL, AOM_PLANE_U,
-                     NULL);
+      av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, AOM_PLANE_U, NULL);
      }
      if (strength_v) {
        cm->clpf_strength_v = strength_v - (strength_v == 4);
-      av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, NULL, AOM_PLANE_V,
-                     NULL);
+      av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, AOM_PLANE_V, NULL);
      }
    }
  #endif
author	Steinar Midtskogen <stemidts@cisco.com>
	Wed, 21 Sep 2016 11:38:16 +0000 (13:38 +0200)
committer	Yaowu Xu <yaowu@google.com>
	Thu, 13 Oct 2016 23:06:10 +0000 (16:06 -0700)
av1/common/clpf.c		patch \| blob \| history
av1/common/clpf.h		patch \| blob \| history
av1/common/enums.h		patch \| blob \| history
av1/common/onyxc_int.h		patch \| blob \| history
av1/decoder/decodeframe.c		patch \| blob \| history
av1/encoder/bitstream.c		patch \| blob \| history
av1/encoder/clpf_rdo.c		patch \| blob \| history
av1/encoder/clpf_rdo.h		patch \| blob \| history
av1/encoder/encoder.c		patch \| blob \| history