AV1_COMMON_SRCS-yes += common/warped_motion.h
AV1_COMMON_SRCS-yes += common/warped_motion.c
endif
+ifeq ($(CONFIG_CLPF),yes)
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf.h
+endif
ifeq ($(CONFIG_DERING),yes)
AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h
ifeq ($(CONFIG_DERING),yes)
AV1_CX_SRCS-yes += encoder/pickdering.c
endif
+ifeq ($(CONFIG_CLPF),yes)
+AV1_CX_SRCS-yes += encoder/clpf_rdo.c
+AV1_CX_SRCS-yes += encoder/clpf_rdo.h
+endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
+#include "aom_dsp/aom_dsp_common.h"
-// Apply the filter on a single block
-static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
- int dstride, int has_top, int has_left, int has_bottom,
- int has_right, int width, int height) {
- int x, y;
+int av1_clpf_maxbits(const AV1_COMMON *cm) {
+ return get_msb(
+ ALIGN_POWER_OF_TWO(cm->mi_cols * MAX_MIB_SIZE, cm->clpf_size + 4) *
+ ALIGN_POWER_OF_TWO(cm->mi_rows * MAX_MIB_SIZE,
+ cm->clpf_size + 4) >>
+ (cm->clpf_size * 2 + 8)) +
+ 1;
+}
+
+int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
+ int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
+ 3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
+ clamp(E - X, -b, b) + 4 * clamp(F - X, -b, b);
+ return (8 + delta - (delta < 0)) >> 4;
+}
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- int X = src[(y + 0) * sstride + x + 0];
- int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
- int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
- int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
- int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
- int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
- ((A < X) + (B < X) + (C < X) + (D < X) > 2);
- dst[y * dstride + x] = X + delta;
+static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
+ int y0, int sizex, int sizey, int width, int height,
+ unsigned int strength) {
+ int x, y;
+ for (y = y0; y < y0 + sizey; y++) {
+ for (x = x0; x < x0 + sizex; x++) {
+ int X = src[y * stride + x];
+ int A = src[AOMMAX(0, y - 1) * stride + x];
+ int B = src[y * stride + AOMMAX(0, x - 2)];
+ int C = src[y * stride + AOMMAX(0, x - 1)];
+ int D = src[y * stride + AOMMIN(width - 1, x + 1)];
+ int E = src[y * stride + AOMMIN(width - 1, x + 2)];
+ int F = src[AOMMIN(height - 1, y + 1) * stride + x];
+ int delta;
+ delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
+ dst[y * stride + x] = X + delta;
}
}
}
-#define BS (MI_SIZE * MAX_MIB_SIZE)
-
-// Iterate over blocks within a superblock
-static void av1_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
- const AV1_COMMON *cm, MACROBLOCKD *xd,
- MODE_INFO *const *mi_8x8, int xpos, int ypos) {
- // Temporary buffer (to allow SIMD parallelism)
- uint8_t buf_unaligned[BS * BS + 15];
- uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
- int x, y, p;
+// Return number of filtered blocks
+int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int enable_fb_flag, unsigned int strength,
+ unsigned int fb_size_log2, uint8_t *blocks,
+ int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
+ const YV12_BUFFER_CONFIG *,
+ const AV1_COMMON *cm, int, int, int,
+ unsigned int, unsigned int, uint8_t *)) {
+ /* Constrained low-pass filter (CLPF) */
+ int c, k, l, m, n;
+ int width = rec->y_crop_width;
+ int height = rec->y_crop_height;
+ int xpos, ypos;
+ int stride_y = rec->y_stride;
+ int stride_c = rec->uv_stride;
+ const int bs = MAX_MIB_SIZE;
+ int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
+ int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
+ int block_index = 0;
- for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
- for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
- for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
- const MB_MODE_INFO *mbmi =
- &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
-
- // Do not filter if there is no residual
- if (!mbmi->skip) {
- // Do not filter frame edges
- int has_top = ypos + y > 0;
- int has_left = xpos + x > 0;
- int has_bottom = ypos + y < cm->mi_rows - 1;
- int has_right = xpos + x < cm->mi_cols - 1;
-#if CLPF_ALLOW_BLOCK_PARALLELISM
- // Do not filter superblock edges
- has_top &= !!y;
- has_left &= !!x;
- has_bottom &= y != MAX_MIB_SIZE - 1;
- has_right &= x != MAX_MIB_SIZE - 1;
-#endif
- av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
- clpf_block(
- xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
- ? buf + y * MI_SIZE * BS + x * MI_SIZE
- : xd->plane[p].dst.buf,
- xd->plane[p].dst.stride,
- CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
- has_top, has_left, has_bottom, has_right,
- MI_SIZE >> xd->plane[p].subsampling_x,
- MI_SIZE >> xd->plane[p].subsampling_y);
+ // Iterate over all filter blocks
+ for (k = 0; k < num_fb_ver; k++) {
+ for (l = 0; l < num_fb_hor; l++) {
+ int h, w;
+ int allskip = 1;
+ for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
+ for (n = 0; allskip && n < (1 << fb_size_log2) / bs; n++) {
+ xpos = (l << fb_size_log2) + n * bs;
+ ypos = (k << fb_size_log2) + m * bs;
+ if (xpos < width && ypos < height) {
+ allskip &=
+ cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
+ ->mbmi.skip;
+ }
}
}
- }
-#if CLPF_ALLOW_PIXEL_PARALLELISM
- for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
- for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
- const MB_MODE_INFO *mbmi =
- &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
- av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
- if (!mbmi->skip) {
- int i = 0;
- for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
- memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
- buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
- MI_SIZE >> xd->plane[p].subsampling_x);
+
+ // Calculate the actual filter block size near frame edges
+ h = AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
+ w = AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
+ h += !h << fb_size_log2;
+ w += !w << fb_size_log2;
+ if (!allskip && // Do not filter the block if all is skip encoded
+ (!enable_fb_flag ||
+ decision(k, l, rec, org, cm, bs, w / bs, h / bs, strength,
+ fb_size_log2, blocks + block_index))) {
+ // Iterate over all smaller blocks inside the filter block
+ for (m = 0; m < (h + bs - 1) / bs; m++) {
+ for (n = 0; n < (w + bs - 1) / bs; n++) {
+ xpos = (l << fb_size_log2) + n * bs;
+ ypos = (k << fb_size_log2) + m * bs;
+ if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
+ ->mbmi.skip) {
+ // Not skip block, apply the filter
+ clpf_block(rec->y_buffer, dst->y_buffer, stride_y, xpos, ypos, bs,
+ bs, width, height, strength);
+ } else { // Skip block, copy instead
+ for (c = 0; c < bs; c++)
+ *(uint64_t *)(dst->y_buffer + (ypos + c) * stride_y + xpos) =
+ *(uint64_t *)(rec->y_buffer + (ypos + c) * stride_y + xpos);
+ }
+ }
}
+ } else { // Entire filter block is skip, copy
+ for (m = 0; m < h; m++)
+ memcpy(dst->y_buffer + ((k << fb_size_log2) + m) * stride_y +
+ (l << fb_size_log2),
+ rec->y_buffer + ((k << fb_size_log2) + m) * stride_y +
+ (l << fb_size_log2),
+ w);
}
+ block_index += !allskip; // Count number of blocks filtered
}
-#endif
}
-}
-
-// Iterate over the superblocks of an entire frame
-void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
- MACROBLOCKD *xd) {
- int x, y;
- for (y = 0; y < cm->mi_rows; y += MAX_MIB_SIZE)
- for (x = 0; x < cm->mi_cols; x += MAX_MIB_SIZE)
- av1_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
+ return block_index;
}
#include "av1/common/reconinter.h"
-// Configuration
-#define CLPF_ALLOW_PIXEL_PARALLELISM \
- 1 // 1 = SIMD friendly (adds a buffer requirement)
-#define CLPF_ALLOW_BLOCK_PARALLELISM \
- 0 // 1 = MT friendly (degrades quality slighty)
-#define CLPF_FILTER_ALL_PLANES \
- 0 // 1 = filter both luma and chroma, 0 = filter only luma
+#define MAX_FB_SIZE 128
-void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
- MACROBLOCKD *xd);
+int av1_clpf_maxbits(const AV1_COMMON *cm);
+int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
+int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int enable_fb_flag, unsigned int strength,
+ unsigned int fb_size_log2, uint8_t *blocks,
+ int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
+ const YV12_BUFFER_CONFIG *,
+ const AV1_COMMON *cm, int, int, int,
+ unsigned int, unsigned int, uint8_t *));
#endif
int use_highbitdepth;
#endif
#if CONFIG_CLPF
- int clpf;
+ int clpf_numblocks;
+ int clpf_size;
+ int clpf_strength;
+ uint8_t *clpf_blocks;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
#if CONFIG_CLPF
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- cm->clpf = aom_rb_read_literal(rb, 1);
+ cm->clpf_blocks = 0;
+ cm->clpf_strength = aom_rb_read_literal(rb, 2);
+ if (cm->clpf_strength) {
+ cm->clpf_size = aom_rb_read_literal(rb, 2);
+ if (cm->clpf_size) {
+ int i;
+ cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm));
+ CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks));
+ for (i = 0; i < cm->clpf_numblocks; i++) {
+ cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1);
+ }
+ }
+ }
+}
+
+static int clpf_bit(int k, int l, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int block_size, int w, int h, unsigned int strength,
+ unsigned int fb_size_log2, uint8_t *bit) {
+ return *bit;
}
#endif
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_CLPF
- if (cm->clpf && !cm->skip_loop_filter)
- av1_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
+ if (cm->clpf_strength && !cm->skip_loop_filter) {
+ YV12_BUFFER_CONFIG dst; // Buffer for the result
+
+ dst = pbi->cur_buf->buf;
+ CHECK_MEM_ERROR(cm, dst.y_buffer, aom_malloc(dst.y_stride * dst.y_height));
+
+ av1_clpf_frame(&dst, &pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
+ cm->clpf_strength + (cm->clpf_strength == 3),
+ 4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
+
+ // Copy result
+ memcpy(pbi->cur_buf->buf.y_buffer, dst.y_buffer,
+ dst.y_height * dst.y_stride);
+ aom_free(dst.y_buffer);
+ }
+ if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
#endif
#if CONFIG_DERING
if (cm->dering_level && !cm->skip_loop_filter) {
#if CONFIG_CLPF
static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
- aom_wb_write_literal(wb, cm->clpf, 1);
+ aom_wb_write_literal(wb, cm->clpf_strength, 2);
+ if (cm->clpf_strength) {
+ aom_wb_write_literal(wb, cm->clpf_size, 2);
+ if (cm->clpf_size) {
+ int i;
+ // TODO(stemidts): The number of bits to transmit could be
+ // implicitly deduced if transmitted after the filter block or
+ // after the frame (when it's known whether the block is all
+ // skip and implicitly unfiltered). And the bits do not have
+ // 50% probability, so a more efficient coding is possible.
+ aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm));
+ for (i = 0; i < cm->clpf_numblocks; i++) {
+ aom_wb_write_literal(wb, cm->clpf_blocks[i], 1);
+ }
+ }
+ }
}
#endif
--- /dev/null
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/clpf.h"
+#include "aom/aom_integer.h"
+#include "av1/common/quant_common.h"
+
+// Calculate the error of a filtered and unfiltered block
+static void detect_clpf(const uint8_t *rec, const uint8_t *org, int x0, int y0,
+ int width, int height, int so, int stride, int *sum0,
+ int *sum1, unsigned int strength) {
+ int x, y;
+ for (y = y0; y < y0 + 8; y++) {
+ for (x = x0; x < x0 + 8; x++) {
+ int O = org[y * so + x];
+ int X = rec[y * stride + x];
+ int A = rec[AOMMAX(0, y - 1) * stride + x];
+ int B = rec[y * stride + AOMMAX(0, x - 2)];
+ int C = rec[y * stride + AOMMAX(0, x - 1)];
+ int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
+ int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
+ int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
+ int delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
+ int Y = X + delta;
+ *sum0 += (O - X) * (O - X);
+ *sum1 += (O - Y) * (O - Y);
+ }
+ }
+}
+
+static void detect_multi_clpf(const uint8_t *rec, const uint8_t *org, int x0,
+ int y0, int width, int height, int so, int stride,
+ int *sum) {
+ int x, y;
+
+ for (y = y0; y < y0 + 8; y++) {
+ for (x = x0; x < x0 + 8; x++) {
+ int O = org[y * so + x];
+ int X = rec[y * stride + x];
+ int A = rec[AOMMAX(0, y - 1) * stride + x];
+ int B = rec[y * stride + AOMMAX(0, x - 2)];
+ int C = rec[y * stride + AOMMAX(0, x - 1)];
+ int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
+ int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
+ int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
+ int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, 1);
+ int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, 2);
+ int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, 4);
+ int F1 = X + delta1;
+ int F2 = X + delta2;
+ int F3 = X + delta3;
+ sum[0] += (O - X) * (O - X);
+ sum[1] += (O - F1) * (O - F1);
+ sum[2] += (O - F2) * (O - F2);
+ sum[3] += (O - F3) * (O - F3);
+ }
+ }
+}
+
+int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int block_size, int w, int h, unsigned int strength,
+ unsigned int fb_size_log2, uint8_t *res) {
+ int m, n, sum0 = 0, sum1 = 0;
+ for (m = 0; m < h; m++) {
+ for (n = 0; n < w; n++) {
+ int xpos = (l << fb_size_log2) + n * block_size;
+ int ypos = (k << fb_size_log2) + m * block_size;
+ const int bs = MAX_MIB_SIZE;
+ if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
+ ->mbmi.skip)
+ detect_clpf(rec->y_buffer, org->y_buffer, xpos, ypos, rec->y_crop_width,
+ rec->y_crop_height, org->y_stride, rec->y_stride, &sum0,
+ &sum1, strength);
+ }
+ }
+ *res = sum1 < sum0;
+ return *res;
+}
+
+// Calculate the square error of all filter settings. Result:
+// res[0][0] : unfiltered
+// res[0][1-3] : strength=1,2,4, no signals
+// res[1][0] : (bit count, fb size = 128)
+// res[1][1-3] : strength=1,2,4, fb size = 128
+// res[2][0] : (bit count, fb size = 64)
+// res[2][1-3] : strength=1,2,4, fb size = 64
+// res[3][0] : (bit count, fb size = 32)
+// res[3][1-3] : strength=1,2,4, fb size = 32
+static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ unsigned int block_size, unsigned int fb_size_log2, int w,
+ int h, int64_t res[4][4]) {
+ int i, m, n, filtered = 0;
+ int sum[4];
+ int bslog = get_msb(block_size);
+ sum[0] = sum[1] = sum[2] = sum[3] = 0;
+ if (fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
+ int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
+
+ fb_size_log2--;
+ w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
+ h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
+ w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
+ h2 = AOMMIN(h - (1 << (fb_size_log2 - bslog)), h >> 1);
+ i = get_msb(MAX_FB_SIZE) - fb_size_log2;
+ sum1 = res[i][1];
+ sum2 = res[i][2];
+ sum3 = res[i][3];
+ oldfiltered = res[i][0];
+ res[i][0] = 0;
+
+ filtered =
+ clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, res);
+ if (1 << (fb_size_log2 - bslog) < w)
+ filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
+ fb_size_log2, w2, h1, res);
+ if (1 << (fb_size_log2 - bslog) < h) {
+ filtered |= clpf_rdo(y + (1 << fb_size_log2), x, rec, org, cm, block_size,
+ fb_size_log2, w1, h2, res);
+ filtered |= clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2),
+ rec, org, cm, block_size, fb_size_log2, w2, h2, res);
+ }
+
+ res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
+ res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
+ res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
+ res[i][0] = oldfiltered + filtered; // Number of signal bits
+ return filtered;
+ }
+
+ for (m = 0; m < h; m++) {
+ for (n = 0; n < w; n++) {
+ int xpos = x + n * block_size;
+ int ypos = y + m * block_size;
+ if (!cm->mi_grid_visible[ypos / MAX_MIB_SIZE * cm->mi_stride +
+ xpos / MAX_MIB_SIZE]
+ ->mbmi.skip) {
+ detect_multi_clpf(rec->y_buffer, org->y_buffer, xpos, ypos,
+ rec->y_crop_width, rec->y_crop_height, org->y_stride,
+ rec->y_stride, sum);
+ filtered = 1;
+ }
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ res[i][0] += sum[0];
+ res[i][1] += sum[1];
+ res[i][2] += sum[2];
+ res[i][3] += sum[3];
+ }
+ return filtered;
+}
+
+void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int *best_strength, int *best_bs) {
+ int i, j, k, l;
+ int64_t best, sums[4][4];
+ int width = rec->y_crop_width, height = rec->y_crop_height;
+ const int bs = MAX_MIB_SIZE;
+ int fb_size_log2 = get_msb(MAX_FB_SIZE);
+ int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
+ int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
+
+ memset(sums, 0, sizeof(sums));
+
+ for (k = 0; k < num_fb_ver; k++) {
+ for (l = 0; l < num_fb_hor; l++) {
+ // Calculate the block size after frame border clipping
+ int h =
+ AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
+ int w =
+ AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
+ h += !h << fb_size_log2;
+ w += !w << fb_size_log2;
+ clpf_rdo(k << fb_size_log2, l << fb_size_log2, rec, org, cm, bs,
+ fb_size_log2, w / bs, h / bs, sums);
+ }
+ }
+
+ for (j = 0; j < 4; j++) {
+ static const double lambda_square[] = {
+ // exp((i - 15.4244) / 8.4010)
+ 0.159451, 0.179607, 0.202310, 0.227884, 0.256690, 0.289138, 0.325687,
+ 0.366856, 0.413230, 0.465465, 0.524303, 0.590579, 0.665233, 0.749323,
+ 0.844044, 0.950737, 1.070917, 1.206289, 1.358774, 1.530533, 1.724004,
+ 1.941931, 2.187406, 2.463911, 2.775368, 3.126195, 3.521370, 3.966498,
+ 4.467893, 5.032669, 5.668837, 6.385421, 7.192586, 8.101784, 9.125911,
+ 10.27949, 11.57890, 13.04256, 14.69124, 16.54832, 18.64016, 20.99641,
+ 23.65052, 26.64013, 30.00764, 33.80084, 38.07352, 42.88630, 48.30746,
+ 54.41389, 61.29221, 69.04002, 77.76720, 87.59756, 98.67056, 111.1432,
+ 125.1926, 141.0179, 158.8436, 178.9227, 201.5399, 227.0160, 255.7126,
+ 288.0366
+ };
+
+ // Estimate the bit costs and adjust the square errors
+ double lambda =
+ lambda_square[av1_get_qindex(&cm->seg, 0, cm->base_qindex) >> 2];
+ int i, cost = (int)((1.2 * lambda * (sums[j][0] + 2 + 2 * (j > 0)) + 0.5));
+ for (i = 0; i < 4; i++)
+ sums[j][i] = ((sums[j][i] + (i && j) * cost) << 4) + j * 4 + i;
+ }
+
+ best = (int64_t)1 << 62;
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ if ((!i || j) && sums[i][j] < best) best = sums[i][j];
+ best &= 15;
+ *best_bs = (best > 3) * (5 + (best < 12) + (best < 8));
+ *best_strength = best ? 1 << ((best - 1) & 3) : 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_CLPF_H_
+#define AV1_ENCODER_CLPF_H_
+
+#include "av1/common/reconinter.h"
+
+int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int block_size, int w, int h, unsigned int strength,
+ unsigned int fb_size_log2, uint8_t *res);
+
+void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ int *best_strength, int *best_bs);
+
+#endif
#include "av1/common/alloccommon.h"
#if CONFIG_CLPF
#include "av1/common/clpf.h"
+#include "av1/encoder/clpf_rdo.h"
#endif
#if CONFIG_DERING
#include "av1/common/dering.h"
av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
#endif
}
-#if CONFIG_DERING
- if (is_lossless_requested(&cpi->oxcf)) {
- cm->dering_level = 0;
- } else {
- cm->dering_level =
- av1_dering_search(cm->frame_to_show, cpi->Source, cm, xd);
- av1_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level);
- }
-#endif // CONFIG_DERING
-
#if CONFIG_CLPF
- cm->clpf = 0;
+ cm->clpf_strength = 0;
+ cm->clpf_size = 2;
+ CHECK_MEM_ERROR(
+ cm, cm->clpf_blocks,
+ aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) *
+ ((cm->frame_to_show->y_crop_height + 31) & ~31) >>
+ 10));
if (!is_lossless_requested(&cpi->oxcf)) {
// Test CLPF
int i, hq = 1;
- uint64_t before, after;
// TODO(yaowu): investigate per-segment CLPF decision and
// an optimal threshold, use 80 for now.
for (i = 0; i < MAX_SEGMENTS; i++)
hq &= av1_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
- if (!hq) { // Don't try filter if the entire image is nearly losslessly
- // encoded
-#if CLPF_FILTER_ALL_PLANES
- aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
- before = aom_get_y_sse(cpi->Source, cm->frame_to_show) +
- aom_get_u_sse(cpi->Source, cm->frame_to_show) +
- aom_get_v_sse(cpi->Source, cm->frame_to_show);
- av1_clpf_frame(cm->frame_to_show, cm, xd);
- after = aom_get_y_sse(cpi->Source, cm->frame_to_show) +
- aom_get_u_sse(cpi->Source, cm->frame_to_show) +
- aom_get_v_sse(cpi->Source, cm->frame_to_show);
-#else
- aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
- before = aom_get_y_sse(cpi->Source, cm->frame_to_show);
- av1_clpf_frame(cm->frame_to_show, cm, xd);
- after = aom_get_y_sse(cpi->Source, cm->frame_to_show);
-#endif
- if (before < after) {
-// No improvement, restore original
-#if CLPF_FILTER_ALL_PLANES
- aom_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-#endif
+ // Don't try filter if the entire image is nearly losslessly encoded
+ if (!hq) {
+ // Find the best strength and block size for the entire frame
+ int fb_size_log2, strength;
+ av1_clpf_test_frame(&cpi->last_frame_uf, cpi->Source, cm, &strength,
+ &fb_size_log2);
+
+ if (!fb_size_log2) fb_size_log2 = get_msb(MAX_FB_SIZE);
+
+ if (!strength) { // Better to disable for the whole frame?
+ cm->clpf_strength = 0;
} else {
- cm->clpf = 1;
+ // Apply the filter using the chosen strength
+ cm->clpf_strength = strength - (strength == 4);
+ cm->clpf_size =
+ fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
+ aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
+ cm->clpf_numblocks =
+ av1_clpf_frame(cm->frame_to_show, &cpi->last_frame_uf, cpi->Source,
+ cm, !!cm->clpf_size, strength, 4 + cm->clpf_size,
+ cm->clpf_blocks, av1_clpf_decision);
}
}
}
#endif
+#if CONFIG_DERING
+ if (is_lossless_requested(&cpi->oxcf)) {
+ cm->dering_level = 0;
+ } else {
+ cm->dering_level =
+ av1_dering_search(cm->frame_to_show, cpi->Source, cm, xd);
+ av1_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level);
+ }
+#endif // CONFIG_DERING
#if CONFIG_LOOP_RESTORATION
if (cm->rst_info.restoration_type != RESTORE_NONE) {
av1_loop_restoration_init(&cm->rst_internal, &cm->rst_info,
if (cm->show_frame) dump_filtered_recon_frames(cpi);
#endif // DUMP_RECON_FRAMES
+#if CONFIG_CLPF
+ aom_free(cm->clpf_blocks);
+#endif
+
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {