From 50bb32ec8703919b791b3366ecfbce78253bf6fd Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 17 Oct 2016 16:44:46 -0400 Subject: [PATCH] Splitting out 8->16 block copy code into copy_sb8_16() No change in output. Change-Id: I4f0e37a879432e2647b3debe6a2c0c670a79dd6f --- av1/common/dering.c | 80 ++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/av1/common/dering.c b/av1/common/dering.c index 4681e7647..31c53e881 100644 --- a/av1/common/dering.c +++ b/av1/common/dering.c @@ -113,6 +113,34 @@ void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, } } +/* TODO: Optimize this function for SSE. */ +static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride, + const uint8_t *src, int src_voffset, int src_hoffset, int sstride, + int vsize, int hsize) +{ + int r, c; + (void)cm; +#if CONFIG_AOM_HIGHBITDEPTH + if (cm->use_highbitdepth) { + const uint16_t *base = &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + + src_hoffset]; + for (r = 0; r < vsize; r++) { + for (c = 0; c < hsize; c++) { + dst[r * dstride + c] = base[r*sstride + c]; + } + } + } else +#endif + { + const uint8_t *base = &src[src_voffset * sstride + src_hoffset]; + for (r = 0; r < vsize; r++) { + for (c = 0; c < hsize; c++) { + dst[r * dstride + c] = base[r*sstride + c]; + } + } + } +} + void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; @@ -157,28 +185,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, for (sbr = 0; sbr < nvsb; sbr++) { last_sbc = -1; for (pli = 0; pli < nplanes; pli++) { -#if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - for (r = 0; r < OD_FILT_VBORDER; r++) { - for (c = 0; c < cm->mi_cols << bsize[pli]; c++) { - curr_linebuf[pli][r * stride + c] = CONVERT_TO_SHORTPTR( - xd->plane[pli].dst.buf)[((MAX_MIB_SIZE << bsize[pli]) * - (sbr + 1) - OD_FILT_VBORDER + r) * xd->plane[pli].dst.stride - + c]; - } - } - } else { -#endif - for (r = 0; r < OD_FILT_VBORDER; r++) { - for (c = 0; c < cm->mi_cols << bsize[pli]; c++) { - curr_linebuf[pli][r * stride + c] = - xd->plane[pli].dst.buf[((MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - - OD_FILT_VBORDER + r) * xd->plane[pli].dst.stride + c]; - } - } -#if CONFIG_AOM_HIGHBITDEPTH - } -#endif + copy_sb8_16(cm, curr_linebuf[pli], stride, xd->plane[pli].dst.buf, + (MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER, 0, + xd->plane[pli].dst.stride, OD_FILT_VBORDER, + cm->mi_cols << bsize[pli]); for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + OD_FILT_VBORDER; r++) { for (c = 0; c < OD_FILT_HBORDER; c++) { colbuf[pli][r][c] = OD_DERING_VERY_LARGE; @@ -218,28 +228,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, coffset = sbc * MAX_MIB_SIZE << bsize[pli]; /* Copy in the pixels we need from the current superblock for deringing.*/ -#if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - for (r = 0; r < rend; r++) { - for (c = cstart; c < cend; ++c) { - src[(r + OD_FILT_VBORDER) * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] - = CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)[ - ((MAX_MIB_SIZE << bsize[pli]) * sbr + r) * - xd->plane[pli].dst.stride + c + coffset]; - } - } - } else { -#endif - for (r = 0; r < rend; r++) { - for (c = cstart; c < cend; ++c) { - src[(r + OD_FILT_VBORDER) * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] - = xd->plane[pli].dst.buf[((MAX_MIB_SIZE << bsize[pli]) * sbr - + r) * xd->plane[pli].dst.stride + c + coffset]; - } - } -#if CONFIG_AOM_HIGHBITDEPTH - } -#endif + copy_sb8_16(cm, &src[OD_FILT_VBORDER*OD_FILT_BSTRIDE + OD_FILT_HBORDER + + cstart], OD_FILT_BSTRIDE, xd->plane[pli].dst.buf, + (MAX_MIB_SIZE << bsize[pli]) * sbr, coffset + cstart, + xd->plane[pli].dst.stride, rend, cend-cstart); if (sbc == nhsb - 1) { /* On the last superblock column, fill in the right border with OD_DERING_VERY_LARGE to avoid filtering with the outside. */ -- 2.40.0