From: Yaowu Xu Date: Fri, 14 Oct 2016 15:39:03 +0000 (+0000) Subject: Revert "Move CLPF block signals from frame to SB level." X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9b25f3067485b32442e13964df098903736c3fd8;p=libvpx Revert "Move CLPF block signals from frame to SB level." This reverts commit 975350387ce0b55bf5af8cb944f6a242b72251ff. Change-Id: I9f8e891739352ca2bde4b294e37c85a668f416e0 --- diff --git a/av1/common/clpf.c b/av1/common/clpf.c index a01e6b461..1cf52724d 100644 --- a/av1/common/clpf.c +++ b/av1/common/clpf.c @@ -14,6 +14,14 @@ #include "aom/aom_image.h" #include "aom_dsp/aom_dsp_common.h" +int av1_clpf_maxbits(const AV1_COMMON *cm) { + return get_msb( + ALIGN_POWER_OF_TWO(cm->mi_cols * MI_SIZE, cm->clpf_size + 4) * + ALIGN_POWER_OF_TWO(cm->mi_rows * MI_SIZE, cm->clpf_size + 4) >> + (cm->clpf_size * 2 + 8)) + + 1; +} + int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) { int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) + 3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) + @@ -65,14 +73,14 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride, #endif // Return number of filtered blocks -void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, - const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, - int enable_fb_flag, unsigned int strength, - unsigned int fb_size_log2, int plane, - int (*decision)(int, int, const YV12_BUFFER_CONFIG *, - const YV12_BUFFER_CONFIG *, - const AV1_COMMON *cm, int, int, int, - unsigned int, unsigned int, int8_t *)) { +int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, + const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, + int enable_fb_flag, unsigned int strength, + unsigned int fb_size_log2, uint8_t *blocks, int plane, + int (*decision)(int, int, const YV12_BUFFER_CONFIG *, + const YV12_BUFFER_CONFIG *, + const AV1_COMMON *cm, int, int, int, + unsigned int, unsigned int, uint8_t *)) { /* Constrained low-pass filter (CLPF) */ int c, k, l, m, n; const int subx = plane != AOM_PLANE_Y && frame->subsampling_x; @@ -87,6 +95,7 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, int dstride = bs; const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2; const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2; + int block_index = 0; uint8_t *cache = NULL; uint8_t **cache_ptr = NULL; uint8_t **cache_dst = NULL; @@ -116,7 +125,7 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, for (k = 0; k < num_fb_ver; k++) { for (l = 0; l < num_fb_hor; l++) { int h, w; - int allskip = !(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2); + int allskip = 1; const int xoff = l << fb_size_log2; const int yoff = k << fb_size_log2; for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) { @@ -139,11 +148,8 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, w += !w << fb_size_log2; if (!allskip && // Do not filter the block if all is skip encoded (!enable_fb_flag || - // Only called if fb_flag enabled (luma only) decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength, - fb_size_log2, - cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride + - xoff / MIN_FB_SIZE))) { + fb_size_log2, blocks + block_index))) { // Iterate over all smaller blocks inside the filter block for (m = 0; m < ((h + bs - 1) >> bslog); m++) { for (n = 0; n < ((w + bs - 1) >> bslog); n++) { @@ -154,9 +160,8 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, sizey = AOMMIN(height - ypos, bs); if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride + (xpos << subx) / MI_SIZE] - ->mbmi.skip || - (enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) { - // Temporary buffering needed for in-place filtering + ->mbmi.skip) { // Not skip block + // Temporary buffering needed if filtering in-place if (cache_ptr[cache_idx]) { // Copy filtered block back into the frame #if CONFIG_AOM_HIGHBITDEPTH @@ -242,6 +247,7 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, } } } + block_index += !allskip; // Count number of blocks filtered } } @@ -281,4 +287,6 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, aom_free(cache); aom_free(cache_ptr); aom_free(cache_dst); + + return block_index; } diff --git a/av1/common/clpf.h b/av1/common/clpf.h index fc74f2cca..8e4213b20 100644 --- a/av1/common/clpf.h +++ b/av1/common/clpf.h @@ -13,19 +13,17 @@ #include "av1/common/reconinter.h" -#define MAX_FB_SIZE_LOG2 7 -#define MIN_FB_SIZE_LOG2 5 -#define MAX_FB_SIZE (1 << MAX_FB_SIZE_LOG2) -#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2) +#define MAX_FB_SIZE 128 +int av1_clpf_maxbits(const AV1_COMMON *cm); int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b); -void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, - const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, - int enable_fb_flag, unsigned int strength, - unsigned int fb_size_log2, int plane, - int (*decision)(int, int, const YV12_BUFFER_CONFIG *, - const YV12_BUFFER_CONFIG *, - const AV1_COMMON *cm, int, int, int, - unsigned int, unsigned int, int8_t *)); +int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, + const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, + int enable_fb_flag, unsigned int strength, + unsigned int fb_size_log2, uint8_t *blocks, int plane, + int (*decision)(int, int, const YV12_BUFFER_CONFIG *, + const YV12_BUFFER_CONFIG *, + const AV1_COMMON *cm, int, int, int, + unsigned int, unsigned int, uint8_t *)); #endif diff --git a/av1/common/enums.h b/av1/common/enums.h index 7c6325542..c8776ef25 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h @@ -246,17 +246,7 @@ typedef enum { PALETTE_COLORS } PALETTE_COLOR; -#ifdef CONFIG_CLPF -#define CLPF_NOFLAG -1 -typedef enum { - CLPF_NOSIZE = 0, - CLPF_32X32 = 1, - CLPF_64X64 = 2, - CLPF_128X128 = 3 -} CLPF_BLOCK_SIZE; -#endif typedef enum ATTRIBUTE_PACKED { - DC_PRED, // Average of above and left pixels V_PRED, // Vertical H_PRED, // Horizontal diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h index 3a2203ac1..98f4f517c 100644 --- a/av1/common/onyxc_int.h +++ b/av1/common/onyxc_int.h @@ -151,27 +151,12 @@ typedef struct AV1Common { int use_highbitdepth; #endif #if CONFIG_CLPF - // Two bits are used to signal the strength for all blocks and the - // valid values are: - // 0: no filtering - // 1: strength = 1 - // 2: strength = 2 - // 3: strength = 4 + int clpf_numblocks; + int clpf_size; int clpf_strength_y; int clpf_strength_u; int clpf_strength_v; - - // If clpf_strength_y is not 0, another two bits are used to signal - // the filter block size. The valid values for clfp_size are: - // 0: no block signalling - // 1: 32x32 - // 2: 64x64 - // 3: 128x128 - CLPF_BLOCK_SIZE clpf_size; - - // Buffer for storing whether to filter individual blocks. - int8_t *clpf_blocks; - int clpf_stride; + uint8_t *clpf_blocks; #endif YV12_BUFFER_CONFIG *frame_to_show; diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index e41f16609..e1bbe24f9 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c @@ -1440,22 +1440,6 @@ static int read_skip(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, } } #endif // CONFIG_SUPERTX -#if CONFIG_CLPF -static int clpf_all_skip(const AV1_COMMON *cm, int mi_col, int mi_row, - int size) { - int r, c; - int skip = 1; - const int maxc = AOMMIN(size, cm->mi_cols - mi_col); - const int maxr = AOMMIN(size, cm->mi_rows - mi_row); - for (r = 0; r < maxr && skip; r++) { - for (c = 0; c < maxc && skip; c++) { - skip &= !!cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c] - ->mbmi.skip; - } - } - return skip; -} -#endif // TODO(slavarnway): eliminate bsize and subsize in future commits static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, @@ -1788,43 +1772,6 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); - -#if CONFIG_CLPF - if (bsize == BLOCK_64X64 && cm->clpf_strength_y && - cm->clpf_size != CLPF_NOSIZE) { - const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride + - mi_col * MI_SIZE / MIN_FB_SIZE; - - if (!((mi_row * MI_SIZE) & 127) && !((mi_col * MI_SIZE) & 127) && - cm->clpf_size == CLPF_128X128) { - cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR); - } else if (cm->clpf_size == CLPF_64X64 && - !clpf_all_skip(cm, mi_col, mi_row, 64 / MI_SIZE)) { - cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR); - } else if (cm->clpf_size == CLPF_32X32) { - const int tr = tl + 1; - const int bl = tl + cm->clpf_stride; - const int br = tr + cm->clpf_stride; - const int size = 32 / MI_SIZE; - - // Up to four bits per SB - if (!clpf_all_skip(cm, mi_col, mi_row, size)) - cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR); - - if (mi_col + size < cm->mi_cols && - !clpf_all_skip(cm, mi_col + size, mi_row, size)) - cm->clpf_blocks[tr] = aom_read_literal(r, 1, ACCT_STR); - - if (mi_row + size < cm->mi_rows && - !clpf_all_skip(cm, mi_col, mi_row + size, size)) - cm->clpf_blocks[bl] = aom_read_literal(r, 1, ACCT_STR); - - if (mi_col + size < cm->mi_cols && mi_row + size < cm->mi_rows && - !clpf_all_skip(cm, mi_col + size, mi_row + size, size)) - cm->clpf_blocks[br] = aom_read_literal(r, 1, ACCT_STR); - } - } -#endif #if CONFIG_DERING if (bsize == BLOCK_64X64) { if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) { @@ -2098,26 +2045,20 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { } #if CONFIG_CLPF -static void setup_clpf(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) { - AV1_COMMON *const cm = &pbi->common; - const int width = pbi->cur_buf->buf.y_crop_width; - const int height = pbi->cur_buf->buf.y_crop_height; - +static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { cm->clpf_blocks = 0; cm->clpf_strength_y = aom_rb_read_literal(rb, 2); cm->clpf_strength_u = aom_rb_read_literal(rb, 2); cm->clpf_strength_v = aom_rb_read_literal(rb, 2); if (cm->clpf_strength_y) { cm->clpf_size = aom_rb_read_literal(rb, 2); - if (cm->clpf_size != CLPF_NOSIZE) { - int size; - cm->clpf_stride = - ((width + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >> MIN_FB_SIZE_LOG2; - size = - cm->clpf_stride * ((height + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >> - MIN_FB_SIZE_LOG2; - CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size)); - memset(cm->clpf_blocks, -1, size); + if (cm->clpf_size) { + int i; + cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm)); + CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks)); + for (i = 0; i < cm->clpf_numblocks; i++) { + cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1); + } } } } @@ -2127,7 +2068,7 @@ static int clpf_bit(UNUSED int k, UNUSED int l, UNUSED const YV12_BUFFER_CONFIG *org, UNUSED const AV1_COMMON *cm, UNUSED int block_size, UNUSED int w, UNUSED int h, UNUSED unsigned int strength, - UNUSED unsigned int fb_size_log2, int8_t *bit) { + UNUSED unsigned int fb_size_log2, uint8_t *bit) { return *bit; } #endif @@ -3420,7 +3361,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi, setup_loopfilter(cm, rb); #if CONFIG_CLPF - setup_clpf(pbi, rb); + setup_clpf(cm, rb); #endif #if CONFIG_DERING setup_dering(cm, rb); @@ -3992,18 +3933,18 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data, if (!cm->skip_loop_filter) { const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf; if (cm->clpf_strength_y) { - av1_clpf_frame(frame, NULL, cm, cm->clpf_size != CLPF_NOSIZE, + av1_clpf_frame(frame, NULL, cm, !!cm->clpf_size, cm->clpf_strength_y + (cm->clpf_strength_y == 3), - 4 + cm->clpf_size, AOM_PLANE_Y, clpf_bit); + 4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, clpf_bit); } if (cm->clpf_strength_u) { - av1_clpf_frame(frame, NULL, cm, 0, // No block signals for chroma - cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, + av1_clpf_frame(frame, NULL, cm, 0, + cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, NULL, AOM_PLANE_U, NULL); } if (cm->clpf_strength_v) { - av1_clpf_frame(frame, NULL, cm, 0, // No block signals for chroma - cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, + av1_clpf_frame(frame, NULL, cm, 0, + cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, NULL, AOM_PLANE_V, NULL); } } diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index 6604728d6..aaffebbcb 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c @@ -1869,37 +1869,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) update_partition_context(xd, mi_row, mi_col, subsize, bsize); -#if CONFIG_CLPF - if (bsize == BLOCK_64X64 && cm->clpf_blocks && cm->clpf_strength_y && - cm->clpf_size != CLPF_NOSIZE) { - const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride + - mi_col * MI_SIZE / MIN_FB_SIZE; - const int tr = tl + 1; - const int bl = tl + cm->clpf_stride; - const int br = tr + cm->clpf_stride; - - // Up to four bits per SB. - // When clpf_size indicates a size larger than the SB size - // (CLPF_128X128), one bit for every fourth SB will be transmitted - // regardless of skip blocks. - if (cm->clpf_blocks[tl] != CLPF_NOFLAG) - aom_write_literal(w, cm->clpf_blocks[tl], 1); - - if (mi_col + MI_SIZE / 2 < cm->mi_cols && - cm->clpf_blocks[tr] != CLPF_NOFLAG) - aom_write_literal(w, cm->clpf_blocks[tr], 1); - - if (mi_row + MI_SIZE / 2 < cm->mi_rows && - cm->clpf_blocks[bl] != CLPF_NOFLAG) - aom_write_literal(w, cm->clpf_blocks[bl], 1); - - if (mi_row + MI_SIZE / 2 < cm->mi_rows && - mi_col + MI_SIZE / 2 < cm->mi_cols && - cm->clpf_blocks[br] != CLPF_NOFLAG) - aom_write_literal(w, cm->clpf_blocks[br], 1); - } -#endif - #if CONFIG_DERING if (bsize == BLOCK_64X64 && cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) { @@ -2564,6 +2533,18 @@ static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { aom_wb_write_literal(wb, cm->clpf_strength_v, 2); if (cm->clpf_strength_y) { aom_wb_write_literal(wb, cm->clpf_size, 2); + if (cm->clpf_size) { + int i; + // TODO(stemidts): The number of bits to transmit could be + // implicitly deduced if transmitted after the filter block or + // after the frame (when it's known whether the block is all + // skip and implicitly unfiltered). And the bits do not have + // 50% probability, so a more efficient coding is possible. + aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm)); + for (i = 0; i < cm->clpf_numblocks; i++) { + aom_wb_write_literal(wb, cm->clpf_blocks ? cm->clpf_blocks[i] : 0, 1); + } + } } } #endif diff --git a/av1/encoder/clpf_rdo.c b/av1/encoder/clpf_rdo.c index 4e652b6cb..1d498f11b 100644 --- a/av1/encoder/clpf_rdo.c +++ b/av1/encoder/clpf_rdo.c @@ -127,15 +127,14 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org, int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, int block_size, int w, int h, unsigned int strength, - unsigned int fb_size_log2, int8_t *res) { + unsigned int fb_size_log2, uint8_t *res) { int m, n, sum0 = 0, sum1 = 0; for (m = 0; m < h; m++) { for (n = 0; n < w; n++) { int xpos = (l << fb_size_log2) + n * block_size; int ypos = (k << fb_size_log2) + m * block_size; - if (fb_size_log2 == MAX_FB_SIZE_LOG2 || - !cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE] + if (!cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE] ->mbmi.skip) { #if CONFIG_AOM_HIGHBITDEPTH if (cm->use_highbitdepth) { @@ -168,8 +167,6 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, // (Only for luma:) // res[1][0] : (bit count, fb size = 128) // res[1][1-3] : strength=1,2,4, fb size = 128 -// res[1][4] : unfiltered, including skip -// res[1][5-7] : strength=1,2,4, including skip, fb_size = 128 // res[2][0] : (bit count, fb size = 64) // res[2][1-3] : strength=1,2,4, fb size = 64 // res[3][0] : (bit count, fb size = 32) @@ -177,9 +174,9 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, unsigned int block_size, unsigned int fb_size_log2, int w, - int h, int64_t res[4][8], int plane) { + int h, int64_t res[4][4], int plane) { int c, m, n, filtered = 0; - int sum[8]; + int sum[4]; const int subx = plane != AOM_PLANE_Y && rec->subsampling_x; const int suby = plane != AOM_PLANE_Y && rec->subsampling_y; int bslog = get_msb(block_size); @@ -196,12 +193,12 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height; int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride; int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride; - sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0; + sum[0] = sum[1] = sum[2] = sum[3] = 0; if (plane == AOM_PLANE_Y && fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) { int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered; - filtered = fb_size_log2-- == MAX_FB_SIZE_LOG2; + fb_size_log2--; w1 = AOMMIN(1 << (fb_size_log2 - bslog), w); h1 = AOMMIN(1 << (fb_size_log2 - bslog), h); w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1); @@ -213,8 +210,8 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, oldfiltered = res[i][0]; res[i][0] = 0; - filtered |= clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, - res, plane); + filtered = clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, + res, plane); if (1 << (fb_size_log2 - bslog) < w) filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size, fb_size_log2, w2, h1, res, plane); @@ -226,18 +223,10 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, cm, block_size, fb_size_log2, w2, h2, res, plane); } - // Correct sums for unfiltered blocks res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]); res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]); res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]); - if (i == 1) { - res[i][5] = AOMMIN(sum1 + res[i][4], res[i][5]); - res[i][6] = AOMMIN(sum2 + res[i][4], res[i][6]); - res[i][7] = AOMMIN(sum3 + res[i][4], res[i][7]); - } - res[i][0] = oldfiltered + filtered; // Number of signal bits - return filtered; } @@ -245,28 +234,27 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, for (n = 0; n < w; n++) { int xpos = x + n * block_size; int ypos = y + m * block_size; - int skip = // Filtered skip blocks stored only for fb_size == 128 - 4 * - !!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride + - (xpos << subx) / MI_SIZE] - ->mbmi.skip; + if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride + + (xpos << subx) / MI_SIZE] + ->mbmi.skip) { #if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec_buffer), - CONVERT_TO_SHORTPTR(org_buffer), rec_stride, - org_stride, xpos, ypos, rec_width, rec_height, - sum + skip, cm->bit_depth - 8, block_size); - } else { + if (cm->use_highbitdepth) { + aom_clpf_detect_multi_hbd( + CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer), + rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum, + cm->bit_depth - 8, block_size); + } else { + aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, + xpos, ypos, rec_width, rec_height, sum, + block_size); + } +#else aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, - xpos, ypos, rec_width, rec_height, sum + skip, + xpos, ypos, rec_width, rec_height, sum, block_size); - } -#else - aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, - xpos, ypos, rec_width, rec_height, sum + skip, - block_size); #endif - filtered |= !skip; + filtered = 1; + } } } @@ -275,12 +263,6 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, res[c][1] += sum[1]; res[c][2] += sum[2]; res[c][3] += sum[3]; - if (c != 1) continue; - // Only needed when fb_size == 128 - res[c][4] += sum[4]; - res[c][5] += sum[5]; - res[c][6] += sum[6]; - res[c][7] += sum[7]; } return filtered; } @@ -289,7 +271,7 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, int *best_strength, int *best_bs, int plane) { int c, j, k, l; - int64_t best, sums[4][8]; + int64_t best, sums[4][4]; int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width; int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height; const int bs = MI_SIZE; @@ -321,14 +303,8 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec, } } - // For fb_size == 128 skip blocks are included in the result. - if (plane == AOM_PLANE_Y) { - sums[1][1] += sums[1][5] - sums[1][4]; - sums[1][2] += sums[1][6] - sums[1][4]; - sums[1][3] += sums[1][7] - sums[1][4]; - } else { // Slightly favour unfiltered chroma + if (plane != AOM_PLANE_Y) // Slightly favour unfiltered chroma sums[0][0] -= sums[0][0] >> 7; - } for (j = 0; j < 4; j++) { static const double lambda_square[] = { diff --git a/av1/encoder/clpf_rdo.h b/av1/encoder/clpf_rdo.h index 586eed03d..bb85fbcea 100644 --- a/av1/encoder/clpf_rdo.h +++ b/av1/encoder/clpf_rdo.h @@ -17,7 +17,7 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, int block_size, int w, int h, unsigned int strength, - unsigned int fb_size_log2, int8_t *res); + unsigned int fb_size_log2, uint8_t *res); void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index af7e5a63b..90b0416fc 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c @@ -3408,23 +3408,12 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { } #if CONFIG_CLPF cm->clpf_strength_y = cm->clpf_strength_u = cm->clpf_strength_v = 0; - cm->clpf_size = CLPF_64X64; - - // Allocate buffer to hold the status of all filter blocks: - // 1 = On, 0 = off, -1 = implicitly off - { - int size; - cm->clpf_stride = ((cm->frame_to_show->y_crop_width + MIN_FB_SIZE - 1) & - ~(MIN_FB_SIZE - 1)) >> - MIN_FB_SIZE_LOG2; - size = cm->clpf_stride * - ((cm->frame_to_show->y_crop_height + MIN_FB_SIZE - 1) & - ~(MIN_FB_SIZE - 1)) >> - MIN_FB_SIZE_LOG2; - CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size)); - memset(cm->clpf_blocks, CLPF_NOFLAG, size); - } - + cm->clpf_size = 2; + CHECK_MEM_ERROR( + cm, cm->clpf_blocks, + aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) * + ((cm->frame_to_show->y_crop_height + 31) & ~31) >> + 10)); if (!is_lossless_requested(&cpi->oxcf)) { const YV12_BUFFER_CONFIG *const frame = cm->frame_to_show; @@ -3439,18 +3428,20 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { // Apply the filter using the chosen strength cm->clpf_strength_y = strength_y - (strength_y == 4); cm->clpf_size = - fb_size_log2 ? fb_size_log2 - MAX_FB_SIZE_LOG2 + 3 : CLPF_NOSIZE; - av1_clpf_frame(frame, cpi->Source, cm, cm->clpf_size != CLPF_NOSIZE, - strength_y, 4 + cm->clpf_size, AOM_PLANE_Y, - av1_clpf_decision); + fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0; + cm->clpf_numblocks = av1_clpf_frame( + frame, cpi->Source, cm, !!cm->clpf_size, strength_y, + 4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, av1_clpf_decision); } if (strength_u) { cm->clpf_strength_u = strength_u - (strength_u == 4); - av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, AOM_PLANE_U, NULL); + av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, NULL, AOM_PLANE_U, + NULL); } if (strength_v) { cm->clpf_strength_v = strength_v - (strength_v == 4); - av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, AOM_PLANE_V, NULL); + av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, NULL, AOM_PLANE_V, + NULL); } } #endif