From: Alex Converse Date: Fri, 20 Mar 2015 21:51:59 +0000 (-0700) Subject: Refactor fast loop filter code to handle 444. X-Git-Tag: v1.5.0~834 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a1e20ec58f3b562af536be80f8dd061878cbaffd;p=libvpx Refactor fast loop filter code to handle 444. Change-Id: I921b1ebabdf617049f8fa26fbe462c3ff115c1ce --- diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 676382dc8..e464d453d 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -293,7 +293,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { } } -static void filter_selectively_vert_row2(PLANE_TYPE plane_type, +static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16_l, unsigned int mask_8x8_l, @@ -301,9 +301,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n, const uint8_t *lfl) { - const int mask_shift = plane_type ? 4 : 8; - const int mask_cutoff = plane_type ? 0xf : 0xff; - const int lfl_forward = plane_type ? 4 : 8; + const int mask_shift = subsampling_factor ? 4 : 8; + const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int lfl_forward = subsampling_factor ? 4 : 8; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; @@ -393,7 +393,7 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, +static void highbd_filter_selectively_vert_row2(int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l, unsigned int mask_8x8_l, @@ -401,9 +401,9 @@ static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) { - const int mask_shift = plane_type ? 4 : 8; - const int mask_cutoff = plane_type ? 0xf : 0xff; - const int lfl_forward = plane_type ? 4 : 8; + const int mask_shift = subsampling_factor ? 4 : 8; + const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int lfl_forward = subsampling_factor ? 4 : 8; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; @@ -1326,248 +1326,203 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, } } -void vp9_filter_block_plane(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { +void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; - uint8_t* const dst0 = dst->buf; - int r, c; + uint8_t *const dst0 = dst->buf; + int r; + uint64_t mask_16x16 = lfm->left_y[TX_16X16]; + uint64_t mask_8x8 = lfm->left_y[TX_8X8]; + uint64_t mask_4x4 = lfm->left_y[TX_4X4]; + uint64_t mask_4x4_int = lfm->int_4x4_y; + + assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + unsigned int mask_16x16_l = mask_16x16 & 0xffff; + unsigned int mask_8x8_l = mask_8x8 & 0xffff; + unsigned int mask_4x4_l = mask_4x4 & 0xffff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; + +// Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2( + plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_y[r << 3], (int)cm->bit_depth); + } else { + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); + } +#else + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 16 * dst->stride; + mask_16x16 >>= 16; + mask_8x8 >>= 16; + mask_4x4 >>= 16; + mask_4x4_int >>= 16; + } - if (!plane->plane_type) { - uint64_t mask_16x16 = lfm->left_y[TX_16X16]; - uint64_t mask_8x8 = lfm->left_y[TX_8X8]; - uint64_t mask_4x4 = lfm->left_y[TX_4X4]; - uint64_t mask_4x4_int = lfm->int_4x4_y; + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_y[TX_16X16]; + mask_8x8 = lfm->above_y[TX_8X8]; + mask_4x4 = lfm->above_y[TX_4X4]; + mask_4x4_int = lfm->int_4x4_y; - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - unsigned int mask_16x16_l = mask_16x16 & 0xffff; - unsigned int mask_8x8_l = mask_8x8 & 0xffff; - unsigned int mask_4x4_l = mask_4x4 & 0xffff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xff; + mask_8x8_r = mask_8x8 & 0xff; + mask_4x4_r = mask_4x4 & 0xff; + } - // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->plane_type, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_y[r << 3]); - } + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz( + CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, + &lfm->lfl_y[r << 3]); + } #else - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, &lfm->lfl_y[r << 3]); + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, + &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 16; - mask_8x8 >>= 16; - mask_4x4 >>= 16; - mask_4x4_int >>= 16; - } - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_y[TX_16X16]; - mask_8x8 = lfm->above_y[TX_8X8]; - mask_4x4 = lfm->above_y[TX_4X4]; - mask_4x4_int = lfm->int_4x4_y; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xff; - mask_8x8_r = mask_8x8 & 0xff; - mask_4x4_r = mask_4x4 & 0xff; + dst->buf += 8 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } +} + +void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r, c; + + uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; + uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; + uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; + uint16_t mask_4x4_int = lfm->int_4x4_uv; + + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { + if (plane->plane_type == 1) { + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { + lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } + } + + { + unsigned int mask_16x16_l = mask_16x16 & 0xff; + unsigned int mask_8x8_l = mask_8x8 & 0xff; + unsigned int mask_4x4_l = mask_4x4 & 0xff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; +// Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, - &lfm->lfl_y[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_vert_row2( + plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1], (int)cm->bit_depth); } else { - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, - &lfm->lfl_y[r << 3]); + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1]); } #else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int & 0xff, - &cm->lf_info, - &lfm->lfl_y[r << 3]); + filter_selectively_vert_row2( + plane->subsampling_x, dst->buf, dst->stride, + mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; + dst->buf += 16 * dst->stride; mask_16x16 >>= 8; mask_8x8 >>= 8; mask_4x4 >>= 8; mask_4x4_int >>= 8; } - } else { - uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; - uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; - uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; - uint16_t mask_4x4_int = lfm->int_4x4_uv; - - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - if (plane->plane_type == 1) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + - (c << 1)]; - } - } - - { - unsigned int mask_16x16_l = mask_16x16 & 0xff; - unsigned int mask_8x8_l = mask_8x8 & 0xff; - unsigned int mask_4x4_l = mask_4x4 & 0xff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; + } - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->plane_type, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1], - (int)cm->bit_depth); - } else { - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); - } -#else - filter_selectively_vert_row2(plane->plane_type, - dst->buf, dst->stride, - mask_16x16_l, - mask_8x8_l, - mask_4x4_l, - mask_4x4_int_l, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); -#endif // CONFIG_VP9_HIGHBITDEPTH + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_uv[TX_16X16]; + mask_8x8 = lfm->above_uv[TX_8X8]; + mask_4x4 = lfm->above_uv[TX_4X4]; + mask_4x4_int = lfm->int_4x4_uv; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = + skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xf; + mask_8x8_r = mask_8x8 & 0xf; + mask_4x4_r = mask_4x4 & 0xf; } - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_uv[TX_16X16]; - mask_8x8 = lfm->above_uv[TX_8X8]; - mask_4x4 = lfm->above_uv[TX_4X4]; - mask_4x4_int = lfm->int_4x4_uv; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = skip_border_4x4_r ? - 0 : (mask_4x4_int & 0xf); - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xf; - mask_8x8_r = mask_8x8 & 0xf; - mask_4x4_r = mask_4x4 & 0xf; - } - #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, - &lfm->lfl_uv[r << 1], - (int)cm->bit_depth); - } else { - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, - &lfm->lfl_uv[r << 1]); - } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfm->lfl_uv[r << 1], (int)cm->bit_depth); + } else { + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r << 1]); + } +#else + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfm->lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; - } + dst->buf += 8 * dst->stride; + mask_16x16 >>= 4; + mask_8x8 >>= 4; + mask_4x4 >>= 4; + mask_4x4_int >>= 4; } } @@ -1576,11 +1531,19 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int use_420 = y_only || (planes[1].subsampling_y == 1 && - planes[1].subsampling_x == 1); + enum lf_path path; LOOP_FILTER_MASK lfm; int mi_row, mi_col; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO *mi = cm->mi + mi_row * cm->mi_stride; @@ -1590,16 +1553,23 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. - if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); - - for (plane = 0; plane < num_planes; ++plane) { - if (use_420) - vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); - else - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + &lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } } } } diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 6d7cabf7c..ab3b179e9 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -29,6 +29,12 @@ extern "C" { #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 2 +enum lf_path { + LF_PATH_420, + LF_PATH_444, + LF_PATH_SLOW, +}; + struct loopfilter { int filter_level; @@ -92,10 +98,15 @@ void vp9_setup_mask(struct VP9Common *const cm, MODE_INFO *mi_8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm); -void vp9_filter_block_plane(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); +void vp9_filter_block_plane_ss00(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss11(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_non420(struct VP9Common *cm, struct macroblockd_plane *plane, diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index 7a3871c41..830a5177f 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_loopfilter.h" #if CONFIG_MULTITHREAD static INLINE void mutex_lock(pthread_mutex_t *const mutex) { @@ -92,10 +93,17 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, int start, int stop, int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int use_420 = y_only || (planes[1].subsampling_y == 1 && - planes[1].subsampling_x == 1); const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; int mi_row, mi_col; + enum lf_path path; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; for (mi_row = start; mi_row < stop; mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { @@ -112,16 +120,23 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. - if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); - - for (plane = 0; plane < num_planes; ++plane) { - if (use_420) - vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); - else - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + &lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } } sync_write(lf_sync, r, c, sb_cols);