From 253c001f8f43d65865f1c7b0a38f796fab4e194d Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 15 Aug 2016 10:27:19 -0700 Subject: [PATCH] Port dering experiment from aom Mannually cherry-picked: 1579133 Use OD_DIVU for small divisions in temporal_filter. 0312229 Replace divides by small values with multiplies. 9c48eec Removing divisions from od_dir_find8() 0950ed8 Merge "Port active map / cyclic refresh fixes to vp10." efefdad Port active map / cyclic refresh fixes to vp10. 1eaf748 Port switch to 9-bit rate cost to aom. 0b1606e Only build deringing code when --enable-dering. e2511e1 Deringing cleanup: don't hardcode the number of levels 8fe5c5d Rename dering_in to od_dering_in to sync with Daala 4eb1380 Makes second filters for 45-degree directions horizontal 7f4c3f5 Removes the superblock variance contribution to the threshold 3dc56f9 Simplifying arithmetic by using multiply+shift cf2aaba Return 0 explicitly for OD_ILOG(0). 49ca22a Use the Daala implementation of OD_ILOG(). 8518724 Fix compiler warning in od_dering.c. 485d6a6 Prevent multiple inclusion of odintrin.h. 51b7a99 Adds the Daala deringing filter as experimental Note that a few of the changes were already in libvpx codebse. Change-Id: I1c32ee7694e5ad22c98b06ff97737cd792cd88ae --- configure | 1 + test/acm_random.h | 5 + test/divu_small_test.cc | 52 +++++ test/test.mk | 1 + vp10/common/blockd.h | 2 + vp10/common/dering.c | 153 +++++++++++++ vp10/common/dering.h | 33 +++ vp10/common/od_dering.c | 352 ++++++++++++++++++++++++++++++ vp10/common/od_dering.h | 86 ++++++++ vp10/common/odintrin.c | 382 +++++++++++++++++++++++++++++++++ vp10/common/odintrin.h | 47 ++++ vp10/common/onyxc_int.h | 3 + vp10/common/vp10_txfm.h | 2 +- vp10/decoder/decodeframe.c | 28 +++ vp10/encoder/bitstream.c | 21 ++ vp10/encoder/encoder.c | 13 +- vp10/encoder/pickdering.c | 180 ++++++++++++++++ vp10/encoder/temporal_filter.c | 67 ++---- vp10/encoder/temporal_filter.h | 1 - vp10/vp10_common.mk | 8 + vp10/vp10cx.mk | 4 +- 21 files changed, 1389 insertions(+), 52 deletions(-) create mode 100644 test/divu_small_test.cc create mode 100644 vp10/common/dering.c create mode 100644 vp10/common/dering.h create mode 100644 vp10/common/od_dering.c create mode 100644 vp10/common/od_dering.h create mode 100644 vp10/common/odintrin.c create mode 100644 vp10/common/odintrin.h create mode 100644 vp10/encoder/pickdering.c diff --git a/configure b/configure index f3fcd3452..633e3edba 100755 --- a/configure +++ b/configure @@ -253,6 +253,7 @@ EXPERIMENT_LIST=" fp_mb_stats emulate_hardware clpf + dering var_tx rect_tx ref_mv diff --git a/test/acm_random.h b/test/acm_random.h index c2f6b0e41..eae2cf98b 100644 --- a/test/acm_random.h +++ b/test/acm_random.h @@ -24,6 +24,11 @@ class ACMRandom { explicit ACMRandom(int seed) : random_(seed) {} void Reset(int seed) { random_.Reseed(seed); } + + uint32_t Rand31(void) { + return random_.Generate(testing::internal::Random::kMaxRange); + } + uint16_t Rand16(void) { const uint32_t value = random_.Generate(testing::internal::Random::kMaxRange); diff --git a/test/divu_small_test.cc b/test/divu_small_test.cc new file mode 100644 index 000000000..a7cfbf7e1 --- /dev/null +++ b/test/divu_small_test.cc @@ -0,0 +1,52 @@ +/*Daala video codec +Copyright (c) 2013 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "vp10/common/odintrin.h" + +using libvpx_test::ACMRandom; + +TEST(Daala, TestDIVUuptoMAX) { + for (int d = 1; d <= OD_DIVU_DMAX; d++) { + for (uint32_t x = 1; x <= 1000000; x++) { + GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d << + " x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d); + } + } +} + +TEST(Daala, TestDIVUrandI31) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + for (int d = 1; d < OD_DIVU_DMAX; d++) { + for (int i = 0; i < 1000000; i++) { + uint32_t x = rnd.Rand31(); + GTEST_ASSERT_EQ(x/d, OD_DIVU_SMALL(x, d)) << "x=" << x << " d=" << d << + " x/d=" << (x/d) << " != " << OD_DIVU_SMALL(x, d); + } + } +} diff --git a/test/test.mk b/test/test.mk index bc8424c9c..feb3f4927 100644 --- a/test/test.mk +++ b/test/test.mk @@ -102,6 +102,7 @@ LIBVPX_TEST_SRCS-yes += partial_idct_test.cc LIBVPX_TEST_SRCS-yes += superframe_test.cc LIBVPX_TEST_SRCS-yes += tile_independence_test.cc LIBVPX_TEST_SRCS-yes += boolcoder_test.cc +LIBVPX_TEST_SRCS-yes += divu_small_test.cc #LIBVPX_TEST_SRCS-yes += encoder_parms_get_to_decoder.cc endif diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 9c04812a8..6abc29082 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -239,6 +239,8 @@ typedef struct { int dq_off_index; int send_dq_bit; #endif // CONFIG_NEW_QUANT + /* deringing gain *per-superblock* */ + int8_t dering_gain; } MB_MODE_INFO; typedef struct MODE_INFO { diff --git a/vp10/common/dering.c b/vp10/common/dering.c new file mode 100644 index 000000000..f657c83fd --- /dev/null +++ b/vp10/common/dering.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_scale_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vp10/common/dering.h" +#include "vp10/common/onyxc_int.h" +#include "vp10/common/reconinter.h" +#include "vp10/common/od_dering.h" + + +int compute_level_from_index(int global_level, int gi) { + static const int dering_gains[DERING_REFINEMENT_LEVELS] = {0, 11, 16, 22}; + int level; + if (global_level == 0) return 0; + level = (global_level*dering_gains[gi] + 8) >> 4; + return clamp(level, gi, MAX_DERING_LEVEL-1); +} + +int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col) { + int r, c; + int maxc, maxr; + int skip = 1; + maxc = cm->mi_cols - mi_col; + maxr = cm->mi_rows - mi_row; + if (maxr > MI_BLOCK_SIZE) maxr = MI_BLOCK_SIZE; + if (maxc > MI_BLOCK_SIZE) maxc = MI_BLOCK_SIZE; + for (r = 0; r < maxr; r++) { + for (c = 0; c < maxc; c++) { + skip = skip && + cm->mi_grid_visible[(mi_row + r)*cm->mi_stride + mi_col + c]-> + mbmi.skip; + } + } + return skip; +} + +void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + MACROBLOCKD *xd, int global_level) { + int r, c; + int sbr, sbc; + int nhsb, nvsb; + od_dering_in *src[3]; + unsigned char *bskip; + int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; + int stride; + int bsize[3]; + int dec[3]; + int pli; + int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); + nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; + nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; + bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); + vp10_setup_dst_planes(xd->plane, frame, 0, 0); + for (pli = 0; pli < 3; pli++) { + dec[pli] = xd->plane[pli].subsampling_x; + bsize[pli] = 8 >> dec[pli]; + } + stride = bsize[0]*cm->mi_cols; + for (pli = 0; pli < 3; pli++) { + src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); + for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) { + for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) { +#if CONFIG_VPX_HIGHBITDEPTH + if (cm->use_highbitdepth) { + src[pli][r * stride + c] = + CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) + [r * xd->plane[pli].dst.stride + c]; + } else { +#endif + src[pli][r * stride + c] = + xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; +#if CONFIG_VPX_HIGHBITDEPTH + } +#endif + } + } + } + for (r = 0; r < cm->mi_rows; ++r) { + for (c = 0; c < cm->mi_cols; ++c) { + const MB_MODE_INFO *mbmi = + &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; + bskip[r * cm->mi_cols + c] = mbmi->skip; + } + } + for (sbr = 0; sbr < nvsb; sbr++) { + for (sbc = 0; sbc < nhsb; sbc++) { + int level; + int nhb, nvb; + nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc); + nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr); + for (pli = 0; pli < 3; pli++) { + int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8]; + int threshold; +#if DERING_REFINEMENT + level = compute_level_from_index( + global_level, + cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride + + MI_BLOCK_SIZE*sbc]->mbmi.dering_gain); +#else + level = global_level; +#endif + /* FIXME: This is a temporary hack that uses more conservative + deringing for chroma. */ + if (pli) level = (level*5 + 4) >> 3; + if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0; + threshold = level << coeff_shift; + od_dering( + &OD_DERING_VTBL_C, + dst, + MI_BLOCK_SIZE*bsize[pli], + &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE + + sbc*bsize[pli]*MI_BLOCK_SIZE], + stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, + &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc], + cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); + for (r = 0; r < bsize[pli]*nvb; ++r) { + for (c = 0; c < bsize[pli]*nhb; ++c) { +#if CONFIG_VPX_HIGHBITDEPTH + if (cm->use_highbitdepth) { + CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) + [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r) + + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = + dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; + } else { +#endif + xd->plane[pli].dst.buf[xd->plane[pli].dst.stride* + (bsize[pli]*MI_BLOCK_SIZE*sbr + r) + + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = + dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; +#if CONFIG_VPX_HIGHBITDEPTH + } +#endif + } + } + } + } + } + for (pli = 0; pli < 3; pli++) { + vpx_free(src[pli]); + } + vpx_free(bskip); +} diff --git a/vp10/common/dering.h b/vp10/common/dering.h new file mode 100644 index 000000000..946366a67 --- /dev/null +++ b/vp10/common/dering.h @@ -0,0 +1,33 @@ +#ifndef VP10_COMMON_DERING_H_ +#define VP10_COMMON_DERING_H_ + +#include "vp10/common/od_dering.h" +#include "vp10/common/onyxc_int.h" +#include "vpx/vpx_integer.h" +#include "./vpx_config.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DERING_LEVEL_BITS 6 +#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS) + +#define DERING_REFINEMENT 1 +#define DERING_REFINEMENT_BITS 2 +#define DERING_REFINEMENT_LEVELS 4 + +int compute_level_from_index(int global_level, int gi); +int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col); +void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + MACROBLOCKD *xd, int global_level); + +int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, + VP10_COMMON *cm, + MACROBLOCKD *xd); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // VP10_COMMON_DERING_H_ diff --git a/vp10/common/od_dering.c b/vp10/common/od_dering.c new file mode 100644 index 000000000..af89b80db --- /dev/null +++ b/vp10/common/od_dering.c @@ -0,0 +1,352 @@ +/*Daala video codec +Copyright (c) 2014-2016 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include "dering.h" + +const od_dering_opt_vtbl OD_DERING_VTBL_C = { + {od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c}, + {od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c} +}; + +/* Generated from gen_filter_tables.c. */ +const int OD_DIRECTION_OFFSETS_TABLE[8][3] = { + {-1*OD_FILT_BSTRIDE + 1, -2*OD_FILT_BSTRIDE + 2, -3*OD_FILT_BSTRIDE + 3 }, + { 0*OD_FILT_BSTRIDE + 1, -1*OD_FILT_BSTRIDE + 2, -1*OD_FILT_BSTRIDE + 3 }, + { 0*OD_FILT_BSTRIDE + 1, 0*OD_FILT_BSTRIDE + 2, 0*OD_FILT_BSTRIDE + 3 }, + { 0*OD_FILT_BSTRIDE + 1, 1*OD_FILT_BSTRIDE + 2, 1*OD_FILT_BSTRIDE + 3 }, + { 1*OD_FILT_BSTRIDE + 1, 2*OD_FILT_BSTRIDE + 2, 3*OD_FILT_BSTRIDE + 3 }, + { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 1, 3*OD_FILT_BSTRIDE + 1 }, + { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 0, 3*OD_FILT_BSTRIDE + 0 }, + { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE - 1, 3*OD_FILT_BSTRIDE - 1 }, +}; + +const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = { + 0, 0.5, 0.707, 1, 1.41, 2 +}; + +/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on. + The search minimizes the weighted variance along all the lines in a + particular direction, i.e. the squared error between the input and a + "predicted" block where each pixel is replaced by the average along a line + in a particular direction. Since each direction have the same sum(x^2) term, + that term is never computed. See Section 2, step 2, of: + http://jmvalin.ca/notes/intra_paint.pdf */ +static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var, + int coeff_shift) { + int i; + int32_t cost[8] = {0}; + int partial[8][15] = {{0}}; + int32_t best_cost = 0; + int best_dir = 0; + /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n. + The output is then 840 times larger, but we don't care for finding + the max. */ + static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105}; + for (i = 0; i < 8; i++) { + int j; + for (j = 0; j < 8; j++) { + int x; + /* We subtract 128 here to reduce the maximum range of the squared + partial sums. */ + x = (img[i*stride + j] >> coeff_shift) - 128; + partial[0][i + j] += x; + partial[1][i + j/2] += x; + partial[2][i] += x; + partial[3][3 + i - j/2] += x; + partial[4][7 + i - j] += x; + partial[5][3 - i/2 + j] += x; + partial[6][j] += x; + partial[7][i/2 + j] += x; + } + } + for (i = 0; i < 8; i++) { + cost[2] += partial[2][i]*partial[2][i]; + cost[6] += partial[6][i]*partial[6][i]; + } + cost[2] *= div_table[8]; + cost[6] *= div_table[8]; + for (i = 0; i < 7; i++) { + cost[0] += (partial[0][i]*partial[0][i] + + partial[0][14 - i]*partial[0][14 - i])*div_table[i + 1]; + cost[4] += (partial[4][i]*partial[4][i] + + partial[4][14 - i]*partial[4][14 - i])*div_table[i + 1]; + } + cost[0] += partial[0][7]*partial[0][7]*div_table[8]; + cost[4] += partial[4][7]*partial[4][7]*div_table[8]; + for (i = 1; i < 8; i += 2) { + int j; + for (j = 0; j < 4 + 1; j++) { + cost[i] += partial[i][3 + j]*partial[i][3 + j]; + } + cost[i] *= div_table[8]; + for (j = 0; j < 4 - 1; j++) { + cost[i] += (partial[i][j]*partial[i][j] + + partial[i][10 - j]*partial[i][10 - j])*div_table[2*j + 2]; + } + } + for (i = 0; i < 8; i++) { + if (cost[i] > best_cost) { + best_cost = cost[i]; + best_dir = i; + } + } + /* Difference between the optimal variance and the variance along the + orthogonal direction. Again, the sum(x^2) terms cancel out. */ + *var = best_cost - cost[(best_dir + 4) & 7]; + /* We'd normally divide by 840, but dividing by 1024 is close enough + for what we're going to do with this. */ + *var >>= 10; + return best_dir; +} + +#define OD_DERING_VERY_LARGE (30000) +#define OD_DERING_INBUF_SIZE ((OD_BSIZE_MAX + 2*OD_FILT_BORDER)*\ + (OD_BSIZE_MAX + 2*OD_FILT_BORDER)) + +/* Smooth in the direction detected. */ +void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in, + int ln, int threshold, int dir) { + int i; + int j; + int k; + static const int taps[3] = {3, 2, 2}; + for (i = 0; i < 1 << ln; i++) { + for (j = 0; j < 1 << ln; j++) { + int16_t sum; + int16_t xx; + int16_t yy; + xx = in[i*OD_FILT_BSTRIDE + j]; + sum= 0; + for (k = 0; k < 3; k++) { + int16_t p0; + int16_t p1; + p0 = in[i*OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] + - xx; + p1 = in[i*OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] + - xx; + if (abs(p0) < threshold) sum += taps[k]*p0; + if (abs(p1) < threshold) sum += taps[k]*p1; + } + yy = xx + ((sum + 8) >> 4); + y[i*ystride + j] = yy; + } + } +} + +void od_filter_dering_direction_4x4_c(int16_t *y, int ystride, + const int16_t *in, int threshold, int dir) { + od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir); +} + +void od_filter_dering_direction_8x8_c(int16_t *y, int ystride, + const int16_t *in, int threshold, int dir) { + od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir); +} + +/* Smooth in the direction orthogonal to what was detected. */ +void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in, + const od_dering_in *x, int xstride, int ln, int threshold, int dir) { + int i; + int j; + int offset; + if (dir > 0 && dir < 4) offset = OD_FILT_BSTRIDE; + else offset = 1; + for (i = 0; i < 1 << ln; i++) { + for (j = 0; j < 1 << ln; j++) { + int16_t athresh; + int16_t yy; + int16_t sum; + int16_t p; + /* Deringing orthogonal to the direction uses a tighter threshold + because we want to be conservative. We've presumably already + achieved some deringing, so the amount of change is expected + to be low. Also, since we might be filtering across an edge, we + want to make sure not to blur it. That being said, we might want + to be a little bit more aggressive on pure horizontal/vertical + since the ringing there tends to be directional, so it doesn't + get removed by the directional filtering. */ + athresh = OD_MINI(threshold, threshold/3 + + abs(in[i*OD_FILT_BSTRIDE + j] - x[i*xstride + j])); + yy = in[i*OD_FILT_BSTRIDE + j]; + sum = 0; + p = in[i*OD_FILT_BSTRIDE + j + offset] - yy; + if (abs(p) < athresh) sum += p; + p = in[i*OD_FILT_BSTRIDE + j - offset] - yy; + if (abs(p) < athresh) sum += p; + p = in[i*OD_FILT_BSTRIDE + j + 2*offset] - yy; + if (abs(p) < athresh) sum += p; + p = in[i*OD_FILT_BSTRIDE + j - 2*offset] - yy; + if (abs(p) < athresh) sum += p; + y[i*ystride + j] = yy + ((3*sum + 8) >> 4); + } + } +} + +void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride, + const int16_t *in, const od_dering_in *x, int xstride, int threshold, + int dir) { + od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir); +} + +void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride, + const int16_t *in, const od_dering_in *x, int xstride, int threshold, + int dir) { + od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir); +} + +/* This table approximates x^0.16 with the index being log2(x). It is clamped + to [-.5, 3]. The table is computed as: + round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */ +static const int16_t OD_THRESH_TABLE_Q8[18] = { + 128, 134, 150, 168, 188, 210, 234, 262, + 292, 327, 365, 408, 455, 509, 569, 635, + 710, 768, +}; + +/* Compute deringing filter threshold for each 8x8 block based on the + directional variance difference. A high variance difference means that we + have a highly directional pattern (e.g. a high contrast edge), so we can + apply more deringing. A low variance means that we either have a low + contrast edge, or a non-directional texture, so we want to be careful not + to blur. */ +static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], + int threshold, int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], + int nhb, int nvb) { + int bx; + int by; + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + int v1; + /* We use the variance of 8x8 blocks to adjust the threshold. */ + v1 = OD_MINI(32767, var[by][bx] >> 6); + thresh[by][bx] = (threshold*OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8; + } + } +} + +void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride, + const od_dering_in *x, int xstride, int nhb, int nvb, int sbx, int sby, + int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], + int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap, + int coeff_shift) { + int i; + int j; + int bx; + int by; + int16_t inbuf[OD_DERING_INBUF_SIZE]; + int16_t *in; + int bsize; + int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS]; + int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS]; + bsize = 3 - xdec; + in = inbuf + OD_FILT_BORDER*OD_FILT_BSTRIDE + OD_FILT_BORDER; + /* We avoid filtering the pixels for which some of the pixels to average + are outside the frame. We could change the filter instead, but it would + add special cases for any future vectorization. */ + for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE; + for (i = -OD_FILT_BORDER*(sby != 0); i < (nvb << bsize) + + OD_FILT_BORDER*(sby != nvsb - 1); i++) { + for (j = -OD_FILT_BORDER*(sbx != 0); j < (nhb << bsize) + + OD_FILT_BORDER*(sbx != nhsb - 1); j++) { + in[i*OD_FILT_BSTRIDE + j] = x[i*xstride + j]; + } + } + if (pli == 0) { + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + dir[by][bx] = od_dir_find8(&x[8*by*xstride + 8*bx], xstride, + &var[by][bx], coeff_shift); + } + } + od_compute_thresh(thresh, threshold, var, nhb, nvb); + } + else { + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + thresh[by][bx] = threshold; + } + } + } + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + int skip; +# if defined(DAALA_ODINTRIN) + int xstart; + int ystart; + int xend; + int yend; + xstart = ystart = 0; + xend = yend = (2 >> xdec); + if (overlap) { + xstart -= (sbx != 0); + ystart -= (sby != 0); + xend += (sbx != nhsb - 1); + yend += (sby != nvsb - 1); + } + skip = 1; + /* We look at whether the current block and its 4x4 surrounding (due to + lapping) are skipped to avoid filtering the same content multiple + times. */ + for (i = ystart; i < yend; i++) { + for (j = xstart; j < xend; j++) { + skip = skip && bskip[((by << 1 >> xdec) + i)*skip_stride + + (bx << 1 >> xdec) + j]; + } + } +#else + (void)overlap; + skip = bskip[by*skip_stride + bx]; +#endif + if (skip) thresh[by][bx] = 0; + } + } + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])( + &y[(by*ystride << bsize) + (bx << bsize)], ystride, + &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)], + thresh[by][bx], dir[by][bx]); + } + } + for (i = 0; i < nvb << bsize; i++) { + for (j = 0; j < nhb << bsize; j++) { + in[i*OD_FILT_BSTRIDE + j] = y[i*ystride + j]; + } + } + for (by = 0; by < nvb; by++) { + for (bx = 0; bx < nhb; bx++) { + (vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])( + &y[(by*ystride << bsize) + (bx << bsize)], ystride, + &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)], + &x[(by*xstride << bsize) + (bx << bsize)], xstride, + thresh[by][bx], dir[by][bx]); + } + } +} diff --git a/vp10/common/od_dering.h b/vp10/common/od_dering.h new file mode 100644 index 000000000..24127dd97 --- /dev/null +++ b/vp10/common/od_dering.h @@ -0,0 +1,86 @@ +/*Daala video codec +Copyright (c) 2003-2010 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#if !defined(_dering_H) +# define _dering_H (1) + +# include "odintrin.h" + +# if defined(DAALA_ODINTRIN) +# include "filter.h" +typedef int16_t od_dering_in; +# endif + +#define OD_DERINGSIZES (2) + +#define OD_DERING_NO_CHECK_OVERLAP (0) +#define OD_DERING_CHECK_OVERLAP (1) + +#define OD_DERING_LEVELS (6) +extern const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS]; + +#define OD_DERING_NBLOCKS (OD_BSIZE_MAX/8) + +#define OD_FILT_BORDER (3) +#define OD_FILT_BSTRIDE (OD_BSIZE_MAX + 2*OD_FILT_BORDER) + +extern const int OD_DIRECTION_OFFSETS_TABLE[8][3]; + +typedef void (*od_filter_dering_direction_func)(int16_t *y, int ystride, + const int16_t *in, int threshold, int dir); +typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride, + const int16_t *in, const od_dering_in *x, int xstride, int threshold, + int dir); + +struct od_dering_opt_vtbl { + od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES]; + od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES]; +}; +typedef struct od_dering_opt_vtbl od_dering_opt_vtbl; + + +void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride, + const od_dering_in *x, int xstride, int nvb, int nhb, int sbx, int sby, + int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], + int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap, + int coeff_shift); +void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in, + int ln, int threshold, int dir); +void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in, + const od_dering_in *x, int xstride, int ln, int threshold, int dir); + +extern const od_dering_opt_vtbl OD_DERING_VTBL_C; + +void od_filter_dering_direction_4x4_c(int16_t *y, int ystride, + const int16_t *in, int threshold, int dir); +void od_filter_dering_direction_8x8_c(int16_t *y, int ystride, + const int16_t *in, int threshold, int dir); +void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride, + const int16_t *in, const od_dering_in *x, int xstride, int threshold, + int dir); +void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride, + const int16_t *in, const od_dering_in *x, int xstride, int threshold, + int dir); + +#endif diff --git a/vp10/common/odintrin.c b/vp10/common/odintrin.c new file mode 100644 index 000000000..ca9a5fc4b --- /dev/null +++ b/vp10/common/odintrin.c @@ -0,0 +1,382 @@ +/*Daala video codec +Copyright (c) 2006-2010 Daala project contributors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ + +#include "vp10/common/odintrin.h" + +/*Constants for use with OD_DIVU_SMALL(). + See \cite{Rob05} for details on computing these constants. + @INPROCEEDINGS{Rob05, + author="Arch D. Robison", + title="{N}-bit Unsigned Division via {N}-bit Multiply-Add", + booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic + (ARITH'05)", + pages="131--139", + address="Cape Cod, MA", + month=Jun, + year=2005 + }*/ +uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = { + {0xFFFFFFFF, 0xFFFFFFFF}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xAAAAAAAB, 0}, + {0xFFFFFFFF, 0xFFFFFFFF}, {0xCCCCCCCD, 0}, {0xAAAAAAAB, 0}, + {0x92492492, 0x92492492}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xE38E38E4, 0}, + {0xCCCCCCCD, 0}, {0xBA2E8BA3, 0}, {0xAAAAAAAB, 0}, + {0x9D89D89E, 0}, {0x92492492, 0x92492492}, {0x88888889, 0}, + {0xFFFFFFFF, 0xFFFFFFFF}, {0xF0F0F0F1, 0}, {0xE38E38E4, 0}, + {0xD79435E5, 0xD79435E5}, {0xCCCCCCCD, 0}, {0xC30C30C3, 0xC30C30C3}, + {0xBA2E8BA3, 0}, {0xB21642C9, 0}, {0xAAAAAAAB, 0}, + {0xA3D70A3E, 0}, {0x9D89D89E, 0}, {0x97B425ED, 0x97B425ED}, + {0x92492492, 0x92492492}, {0x8D3DCB09, 0}, {0x88888889, 0}, + {0x84210842, 0x84210842}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xF83E0F84, 0}, + {0xF0F0F0F1, 0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE38E38E4, 0}, + {0xDD67C8A6, 0xDD67C8A6}, {0xD79435E5, 0xD79435E5}, {0xD20D20D2, 0xD20D20D2}, + {0xCCCCCCCD, 0}, {0xC7CE0C7D, 0}, {0xC30C30C3, 0xC30C30C3}, + {0xBE82FA0C, 0}, {0xBA2E8BA3, 0}, {0xB60B60B6, 0xB60B60B6}, + {0xB21642C9, 0}, {0xAE4C415D, 0}, {0xAAAAAAAB, 0}, + {0xA72F053A, 0}, {0xA3D70A3E, 0}, {0xA0A0A0A1, 0}, + {0x9D89D89E, 0}, {0x9A90E7D9, 0x9A90E7D9}, {0x97B425ED, 0x97B425ED}, + {0x94F2094F, 0x94F2094F}, {0x92492492, 0x92492492}, {0x8FB823EE, 0x8FB823EE}, + {0x8D3DCB09, 0}, {0x8AD8F2FC, 0}, {0x88888889, 0}, + {0x864B8A7E, 0}, {0x84210842, 0x84210842}, {0x82082082, 0x82082082}, + {0xFFFFFFFF, 0xFFFFFFFF}, {0xFC0FC0FD, 0}, {0xF83E0F84, 0}, + {0xF4898D60, 0}, {0xF0F0F0F1, 0}, {0xED7303B6, 0}, + {0xEA0EA0EA, 0xEA0EA0EA}, {0xE6C2B449, 0}, {0xE38E38E4, 0}, + {0xE070381C, 0xE070381C}, {0xDD67C8A6, 0xDD67C8A6}, {0xDA740DA8, 0}, + {0xD79435E5, 0xD79435E5}, {0xD4C77B04, 0}, {0xD20D20D2, 0xD20D20D2}, + {0xCF6474A9, 0}, {0xCCCCCCCD, 0}, {0xCA4587E7, 0}, + {0xC7CE0C7D, 0}, {0xC565C87C, 0}, {0xC30C30C3, 0xC30C30C3}, + {0xC0C0C0C1, 0}, {0xBE82FA0C, 0}, {0xBC52640C, 0}, + {0xBA2E8BA3, 0}, {0xB81702E1, 0}, {0xB60B60B6, 0xB60B60B6}, + {0xB40B40B4, 0xB40B40B4}, {0xB21642C9, 0}, {0xB02C0B03, 0}, + {0xAE4C415D, 0}, {0xAC769184, 0xAC769184}, {0xAAAAAAAB, 0}, + {0xA8E83F57, 0xA8E83F57}, {0xA72F053A, 0}, {0xA57EB503, 0}, + {0xA3D70A3E, 0}, {0xA237C32B, 0xA237C32B}, {0xA0A0A0A1, 0}, + {0x9F1165E7, 0x9F1165E7}, {0x9D89D89E, 0}, {0x9C09C09C, 0x9C09C09C}, + {0x9A90E7D9, 0x9A90E7D9}, {0x991F1A51, 0x991F1A51}, {0x97B425ED, 0x97B425ED}, + {0x964FDA6C, 0x964FDA6C}, {0x94F2094F, 0x94F2094F}, {0x939A85C4, 0x939A85C4}, + {0x92492492, 0x92492492}, {0x90FDBC09, 0x90FDBC09}, {0x8FB823EE, 0x8FB823EE}, + {0x8E78356D, 0x8E78356D}, {0x8D3DCB09, 0}, {0x8C08C08C, 0x8C08C08C}, + {0x8AD8F2FC, 0}, {0x89AE408A, 0}, {0x88888889, 0}, + {0x8767AB5F, 0x8767AB5F}, {0x864B8A7E, 0}, {0x85340853, 0x85340853}, + {0x84210842, 0x84210842}, {0x83126E98, 0}, {0x82082082, 0x82082082}, + {0x81020408, 0x81020408}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xFE03F810, 0}, + {0xFC0FC0FD, 0}, {0xFA232CF3, 0}, {0xF83E0F84, 0}, + {0xF6603D99, 0}, {0xF4898D60, 0}, {0xF2B9D649, 0}, + {0xF0F0F0F1, 0}, {0xEF2EB720, 0}, {0xED7303B6, 0}, + {0xEBBDB2A6, 0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE865AC7C, 0}, + {0xE6C2B449, 0}, {0xE525982B, 0}, {0xE38E38E4, 0}, + {0xE1FC780F, 0}, {0xE070381C, 0xE070381C}, {0xDEE95C4D, 0}, + {0xDD67C8A6, 0xDD67C8A6}, {0xDBEB61EF, 0}, {0xDA740DA8, 0}, + {0xD901B204, 0}, {0xD79435E5, 0xD79435E5}, {0xD62B80D7, 0}, + {0xD4C77B04, 0}, {0xD3680D37, 0}, {0xD20D20D2, 0xD20D20D2}, + {0xD0B69FCC, 0}, {0xCF6474A9, 0}, {0xCE168A77, 0xCE168A77}, + {0xCCCCCCCD, 0}, {0xCB8727C1, 0}, {0xCA4587E7, 0}, + {0xC907DA4F, 0}, {0xC7CE0C7D, 0}, {0xC6980C6A, 0}, + {0xC565C87C, 0}, {0xC4372F86, 0}, {0xC30C30C3, 0xC30C30C3}, + {0xC1E4BBD6, 0}, {0xC0C0C0C1, 0}, {0xBFA02FE8, 0xBFA02FE8}, + {0xBE82FA0C, 0}, {0xBD691047, 0xBD691047}, {0xBC52640C, 0}, + {0xBB3EE722, 0}, {0xBA2E8BA3, 0}, {0xB92143FA, 0xB92143FA}, + {0xB81702E1, 0}, {0xB70FBB5A, 0xB70FBB5A}, {0xB60B60B6, 0xB60B60B6}, + {0xB509E68B, 0}, {0xB40B40B4, 0xB40B40B4}, {0xB30F6353, 0}, + {0xB21642C9, 0}, {0xB11FD3B8, 0xB11FD3B8}, {0xB02C0B03, 0}, + {0xAF3ADDC7, 0}, {0xAE4C415D, 0}, {0xAD602B58, 0xAD602B58}, + {0xAC769184, 0xAC769184}, {0xAB8F69E3, 0}, {0xAAAAAAAB, 0}, + {0xA9C84A48, 0}, {0xA8E83F57, 0xA8E83F57}, {0xA80A80A8, 0xA80A80A8}, + {0xA72F053A, 0}, {0xA655C439, 0xA655C439}, {0xA57EB503, 0}, + {0xA4A9CF1E, 0}, {0xA3D70A3E, 0}, {0xA3065E40, 0}, + {0xA237C32B, 0xA237C32B}, {0xA16B312F, 0}, {0xA0A0A0A1, 0}, + {0x9FD809FE, 0}, {0x9F1165E7, 0x9F1165E7}, {0x9E4CAD24, 0}, + {0x9D89D89E, 0}, {0x9CC8E161, 0}, {0x9C09C09C, 0x9C09C09C}, + {0x9B4C6F9F, 0}, {0x9A90E7D9, 0x9A90E7D9}, {0x99D722DB, 0}, + {0x991F1A51, 0x991F1A51}, {0x9868C80A, 0}, {0x97B425ED, 0x97B425ED}, + {0x97012E02, 0x97012E02}, {0x964FDA6C, 0x964FDA6C}, {0x95A02568, 0x95A02568}, + {0x94F2094F, 0x94F2094F}, {0x94458094, 0x94458094}, {0x939A85C4, 0x939A85C4}, + {0x92F11384, 0x92F11384}, {0x92492492, 0x92492492}, {0x91A2B3C5, 0}, + {0x90FDBC09, 0x90FDBC09}, {0x905A3863, 0x905A3863}, {0x8FB823EE, 0x8FB823EE}, + {0x8F1779DA, 0}, {0x8E78356D, 0x8E78356D}, {0x8DDA5202, 0x8DDA5202}, + {0x8D3DCB09, 0}, {0x8CA29C04, 0x8CA29C04}, {0x8C08C08C, 0x8C08C08C}, + {0x8B70344A, 0x8B70344A}, {0x8AD8F2FC, 0}, {0x8A42F870, 0x8A42F870}, + {0x89AE408A, 0}, {0x891AC73B, 0}, {0x88888889, 0}, + {0x87F78088, 0}, {0x8767AB5F, 0x8767AB5F}, {0x86D90545, 0}, + {0x864B8A7E, 0}, {0x85BF3761, 0x85BF3761}, {0x85340853, 0x85340853}, + {0x84A9F9C8, 0x84A9F9C8}, {0x84210842, 0x84210842}, {0x83993052, 0x83993052}, + {0x83126E98, 0}, {0x828CBFBF, 0}, {0x82082082, 0x82082082}, + {0x81848DA9, 0}, {0x81020408, 0x81020408}, {0x80808081, 0}, + {0xFFFFFFFF, 0xFFFFFFFF}, {0xFF00FF01, 0}, {0xFE03F810, 0}, + {0xFD08E551, 0}, {0xFC0FC0FD, 0}, {0xFB188566, 0}, + {0xFA232CF3, 0}, {0xF92FB222, 0}, {0xF83E0F84, 0}, + {0xF74E3FC3, 0}, {0xF6603D99, 0}, {0xF57403D6, 0}, + {0xF4898D60, 0}, {0xF3A0D52D, 0}, {0xF2B9D649, 0}, + {0xF1D48BCF, 0}, {0xF0F0F0F1, 0}, {0xF00F00F0, 0xF00F00F0}, + {0xEF2EB720, 0}, {0xEE500EE5, 0xEE500EE5}, {0xED7303B6, 0}, + {0xEC979119, 0}, {0xEBBDB2A6, 0}, {0xEAE56404, 0}, + {0xEA0EA0EA, 0xEA0EA0EA}, {0xE9396520, 0}, {0xE865AC7C, 0}, + {0xE79372E3, 0}, {0xE6C2B449, 0}, {0xE5F36CB0, 0xE5F36CB0}, + {0xE525982B, 0}, {0xE45932D8, 0}, {0xE38E38E4, 0}, + {0xE2C4A689, 0}, {0xE1FC780F, 0}, {0xE135A9CA, 0}, + {0xE070381C, 0xE070381C}, {0xDFAC1F75, 0}, {0xDEE95C4D, 0}, + {0xDE27EB2D, 0}, {0xDD67C8A6, 0xDD67C8A6}, {0xDCA8F159, 0}, + {0xDBEB61EF, 0}, {0xDB2F171E, 0}, {0xDA740DA8, 0}, + {0xD9BA4257, 0}, {0xD901B204, 0}, {0xD84A598F, 0}, + {0xD79435E5, 0xD79435E5}, {0xD6DF43FD, 0}, {0xD62B80D7, 0}, + {0xD578E97D, 0}, {0xD4C77B04, 0}, {0xD417328A, 0}, + {0xD3680D37, 0}, {0xD2BA083C, 0}, {0xD20D20D2, 0xD20D20D2}, + {0xD161543E, 0xD161543E}, {0xD0B69FCC, 0}, {0xD00D00D0, 0xD00D00D0}, + {0xCF6474A9, 0}, {0xCEBCF8BC, 0}, {0xCE168A77, 0xCE168A77}, + {0xCD712753, 0}, {0xCCCCCCCD, 0}, {0xCC29786D, 0}, + {0xCB8727C1, 0}, {0xCAE5D85F, 0xCAE5D85F}, {0xCA4587E7, 0}, + {0xC9A633FD, 0}, {0xC907DA4F, 0}, {0xC86A7890, 0xC86A7890}, + {0xC7CE0C7D, 0}, {0xC73293D8, 0}, {0xC6980C6A, 0}, + {0xC5FE7403, 0xC5FE7403}, {0xC565C87C, 0}, {0xC4CE07B0, 0xC4CE07B0}, + {0xC4372F86, 0}, {0xC3A13DE6, 0xC3A13DE6}, {0xC30C30C3, 0xC30C30C3}, + {0xC2780614, 0}, {0xC1E4BBD6, 0}, {0xC152500C, 0xC152500C}, + {0xC0C0C0C1, 0}, {0xC0300C03, 0xC0300C03}, {0xBFA02FE8, 0xBFA02FE8}, + {0xBF112A8B, 0}, {0xBE82FA0C, 0}, {0xBDF59C92, 0}, + {0xBD691047, 0xBD691047}, {0xBCDD535E, 0}, {0xBC52640C, 0}, + {0xBBC8408D, 0}, {0xBB3EE722, 0}, {0xBAB65610, 0xBAB65610}, + {0xBA2E8BA3, 0}, {0xB9A7862A, 0xB9A7862A}, {0xB92143FA, 0xB92143FA}, + {0xB89BC36D, 0}, {0xB81702E1, 0}, {0xB79300B8, 0}, + {0xB70FBB5A, 0xB70FBB5A}, {0xB68D3134, 0xB68D3134}, {0xB60B60B6, 0xB60B60B6}, + {0xB58A4855, 0xB58A4855}, {0xB509E68B, 0}, {0xB48A39D4, 0xB48A39D4}, + {0xB40B40B4, 0xB40B40B4}, {0xB38CF9B0, 0xB38CF9B0}, {0xB30F6353, 0}, + {0xB2927C2A, 0}, {0xB21642C9, 0}, {0xB19AB5C5, 0}, + {0xB11FD3B8, 0xB11FD3B8}, {0xB0A59B42, 0}, {0xB02C0B03, 0}, + {0xAFB321A1, 0xAFB321A1}, {0xAF3ADDC7, 0}, {0xAEC33E20, 0}, + {0xAE4C415D, 0}, {0xADD5E632, 0xADD5E632}, {0xAD602B58, 0xAD602B58}, + {0xACEB0F89, 0xACEB0F89}, {0xAC769184, 0xAC769184}, {0xAC02B00B, 0}, + {0xAB8F69E3, 0}, {0xAB1CBDD4, 0}, {0xAAAAAAAB, 0}, + {0xAA392F36, 0}, {0xA9C84A48, 0}, {0xA957FAB5, 0xA957FAB5}, + {0xA8E83F57, 0xA8E83F57}, {0xA8791709, 0}, {0xA80A80A8, 0xA80A80A8}, + {0xA79C7B17, 0}, {0xA72F053A, 0}, {0xA6C21DF7, 0}, + {0xA655C439, 0xA655C439}, {0xA5E9F6ED, 0xA5E9F6ED}, {0xA57EB503, 0}, + {0xA513FD6C, 0}, {0xA4A9CF1E, 0}, {0xA4402910, 0xA4402910}, + {0xA3D70A3E, 0}, {0xA36E71A3, 0}, {0xA3065E40, 0}, + {0xA29ECF16, 0xA29ECF16}, {0xA237C32B, 0xA237C32B}, {0xA1D13986, 0}, + {0xA16B312F, 0}, {0xA105A933, 0}, {0xA0A0A0A1, 0}, + {0xA03C1689, 0}, {0x9FD809FE, 0}, {0x9F747A15, 0x9F747A15}, + {0x9F1165E7, 0x9F1165E7}, {0x9EAECC8D, 0x9EAECC8D}, {0x9E4CAD24, 0}, + {0x9DEB06C9, 0x9DEB06C9}, {0x9D89D89E, 0}, {0x9D2921C4, 0}, + {0x9CC8E161, 0}, {0x9C69169B, 0x9C69169B}, {0x9C09C09C, 0x9C09C09C}, + {0x9BAADE8E, 0x9BAADE8E}, {0x9B4C6F9F, 0}, {0x9AEE72FD, 0}, + {0x9A90E7D9, 0x9A90E7D9}, {0x9A33CD67, 0x9A33CD67}, {0x99D722DB, 0}, + {0x997AE76B, 0x997AE76B}, {0x991F1A51, 0x991F1A51}, {0x98C3BAC7, 0x98C3BAC7}, + {0x9868C80A, 0}, {0x980E4156, 0x980E4156}, {0x97B425ED, 0x97B425ED}, + {0x975A7510, 0}, {0x97012E02, 0x97012E02}, {0x96A8500A, 0}, + {0x964FDA6C, 0x964FDA6C}, {0x95F7CC73, 0}, {0x95A02568, 0x95A02568}, + {0x9548E498, 0}, {0x94F2094F, 0x94F2094F}, {0x949B92DE, 0}, + {0x94458094, 0x94458094}, {0x93EFD1C5, 0x93EFD1C5}, {0x939A85C4, 0x939A85C4}, + {0x93459BE7, 0}, {0x92F11384, 0x92F11384}, {0x929CEBF5, 0}, + {0x92492492, 0x92492492}, {0x91F5BCB9, 0}, {0x91A2B3C5, 0}, + {0x91500915, 0x91500915}, {0x90FDBC09, 0x90FDBC09}, {0x90ABCC02, 0x90ABCC02}, + {0x905A3863, 0x905A3863}, {0x90090090, 0x90090090}, {0x8FB823EE, 0x8FB823EE}, + {0x8F67A1E4, 0}, {0x8F1779DA, 0}, {0x8EC7AB3A, 0}, + {0x8E78356D, 0x8E78356D}, {0x8E2917E1, 0}, {0x8DDA5202, 0x8DDA5202}, + {0x8D8BE340, 0}, {0x8D3DCB09, 0}, {0x8CF008CF, 0x8CF008CF}, + {0x8CA29C04, 0x8CA29C04}, {0x8C55841D, 0}, {0x8C08C08C, 0x8C08C08C}, + {0x8BBC50C9, 0}, {0x8B70344A, 0x8B70344A}, {0x8B246A88, 0}, + {0x8AD8F2FC, 0}, {0x8A8DCD20, 0}, {0x8A42F870, 0x8A42F870}, + {0x89F8746A, 0}, {0x89AE408A, 0}, {0x89645C4F, 0x89645C4F}, + {0x891AC73B, 0}, {0x88D180CD, 0x88D180CD}, {0x88888889, 0}, + {0x883FDDF0, 0x883FDDF0}, {0x87F78088, 0}, {0x87AF6FD6, 0}, + {0x8767AB5F, 0x8767AB5F}, {0x872032AC, 0x872032AC}, {0x86D90545, 0}, + {0x869222B2, 0}, {0x864B8A7E, 0}, {0x86053C34, 0x86053C34}, + {0x85BF3761, 0x85BF3761}, {0x85797B91, 0x85797B91}, {0x85340853, 0x85340853}, + {0x84EEDD36, 0}, {0x84A9F9C8, 0x84A9F9C8}, {0x84655D9C, 0}, + {0x84210842, 0x84210842}, {0x83DCF94E, 0}, {0x83993052, 0x83993052}, + {0x8355ACE4, 0}, {0x83126E98, 0}, {0x82CF7504, 0}, + {0x828CBFBF, 0}, {0x824A4E61, 0}, {0x82082082, 0x82082082}, + {0x81C635BC, 0x81C635BC}, {0x81848DA9, 0}, {0x814327E4, 0}, + {0x81020408, 0x81020408}, {0x80C121B3, 0}, {0x80808081, 0}, + {0x80402010, 0x80402010}, {0xFFFFFFFF, 0xFFFFFFFF}, {0xFF803FE1, 0}, + {0xFF00FF01, 0}, {0xFE823CA6, 0}, {0xFE03F810, 0}, + {0xFD863087, 0}, {0xFD08E551, 0}, {0xFC8C15B5, 0}, + {0xFC0FC0FD, 0}, {0xFB93E673, 0}, {0xFB188566, 0}, + {0xFA9D9D20, 0}, {0xFA232CF3, 0}, {0xF9A9342D, 0}, + {0xF92FB222, 0}, {0xF8B6A622, 0xF8B6A622}, {0xF83E0F84, 0}, + {0xF7C5ED9D, 0}, {0xF74E3FC3, 0}, {0xF6D7054E, 0}, + {0xF6603D99, 0}, {0xF5E9E7FD, 0}, {0xF57403D6, 0}, + {0xF4FE9083, 0}, {0xF4898D60, 0}, {0xF414F9CE, 0}, + {0xF3A0D52D, 0}, {0xF32D1EE0, 0}, {0xF2B9D649, 0}, + {0xF246FACC, 0}, {0xF1D48BCF, 0}, {0xF16288B9, 0}, + {0xF0F0F0F1, 0}, {0xF07FC3E0, 0xF07FC3E0}, {0xF00F00F0, 0xF00F00F0}, + {0xEF9EA78C, 0}, {0xEF2EB720, 0}, {0xEEBF2F19, 0}, + {0xEE500EE5, 0xEE500EE5}, {0xEDE155F4, 0}, {0xED7303B6, 0}, + {0xED05179C, 0xED05179C}, {0xEC979119, 0}, {0xEC2A6FA0, 0xEC2A6FA0}, + {0xEBBDB2A6, 0}, {0xEB5159A0, 0}, {0xEAE56404, 0}, + {0xEA79D14A, 0}, {0xEA0EA0EA, 0xEA0EA0EA}, {0xE9A3D25E, 0xE9A3D25E}, + {0xE9396520, 0}, {0xE8CF58AB, 0}, {0xE865AC7C, 0}, + {0xE7FC600F, 0}, {0xE79372E3, 0}, {0xE72AE476, 0}, + {0xE6C2B449, 0}, {0xE65AE1DC, 0}, {0xE5F36CB0, 0xE5F36CB0}, + {0xE58C544A, 0}, {0xE525982B, 0}, {0xE4BF37D9, 0}, + {0xE45932D8, 0}, {0xE3F388AF, 0}, {0xE38E38E4, 0}, + {0xE32942FF, 0}, {0xE2C4A689, 0}, {0xE260630B, 0}, + {0xE1FC780F, 0}, {0xE198E520, 0}, {0xE135A9CA, 0}, + {0xE0D2C59A, 0}, {0xE070381C, 0xE070381C}, {0xE00E00E0, 0xE00E00E0}, + {0xDFAC1F75, 0}, {0xDF4A9369, 0}, {0xDEE95C4D, 0}, + {0xDE8879B3, 0}, {0xDE27EB2D, 0}, {0xDDC7B04D, 0}, + {0xDD67C8A6, 0xDD67C8A6}, {0xDD0833CE, 0}, {0xDCA8F159, 0}, + {0xDC4A00DD, 0}, {0xDBEB61EF, 0}, {0xDB8D1428, 0}, + {0xDB2F171E, 0}, {0xDAD16A6B, 0}, {0xDA740DA8, 0}, + {0xDA17006D, 0xDA17006D}, {0xD9BA4257, 0}, {0xD95DD300, 0}, + {0xD901B204, 0}, {0xD8A5DEFF, 0}, {0xD84A598F, 0}, + {0xD7EF2152, 0}, {0xD79435E5, 0xD79435E5}, {0xD73996E9, 0}, + {0xD6DF43FD, 0}, {0xD6853CC1, 0}, {0xD62B80D7, 0}, + {0xD5D20FDF, 0}, {0xD578E97D, 0}, {0xD5200D52, 0xD5200D52}, + {0xD4C77B04, 0}, {0xD46F3235, 0}, {0xD417328A, 0}, + {0xD3BF7BA9, 0}, {0xD3680D37, 0}, {0xD310E6DB, 0}, + {0xD2BA083C, 0}, {0xD2637101, 0}, {0xD20D20D2, 0xD20D20D2}, + {0xD1B71759, 0}, {0xD161543E, 0xD161543E}, {0xD10BD72C, 0}, + {0xD0B69FCC, 0}, {0xD061ADCA, 0}, {0xD00D00D0, 0xD00D00D0}, + {0xCFB8988C, 0}, {0xCF6474A9, 0}, {0xCF1094D4, 0}, + {0xCEBCF8BC, 0}, {0xCE69A00D, 0}, {0xCE168A77, 0xCE168A77}, + {0xCDC3B7A9, 0xCDC3B7A9}, {0xCD712753, 0}, {0xCD1ED924, 0}, + {0xCCCCCCCD, 0}, {0xCC7B0200, 0}, {0xCC29786D, 0}, + {0xCBD82FC7, 0}, {0xCB8727C1, 0}, {0xCB36600D, 0}, + {0xCAE5D85F, 0xCAE5D85F}, {0xCA95906C, 0}, {0xCA4587E7, 0}, + {0xC9F5BE86, 0}, {0xC9A633FD, 0}, {0xC956E803, 0xC956E803}, + {0xC907DA4F, 0}, {0xC8B90A96, 0}, {0xC86A7890, 0xC86A7890}, + {0xC81C23F5, 0xC81C23F5}, {0xC7CE0C7D, 0}, {0xC78031E0, 0xC78031E0}, + {0xC73293D8, 0}, {0xC6E5321D, 0}, {0xC6980C6A, 0}, + {0xC64B2278, 0xC64B2278}, {0xC5FE7403, 0xC5FE7403}, {0xC5B200C6, 0}, + {0xC565C87C, 0}, {0xC519CAE0, 0xC519CAE0}, {0xC4CE07B0, 0xC4CE07B0}, + {0xC4827EA8, 0xC4827EA8}, {0xC4372F86, 0}, {0xC3EC1A06, 0}, + {0xC3A13DE6, 0xC3A13DE6}, {0xC3569AE6, 0}, {0xC30C30C3, 0xC30C30C3}, + {0xC2C1FF3E, 0}, {0xC2780614, 0}, {0xC22E4507, 0}, + {0xC1E4BBD6, 0}, {0xC19B6A42, 0}, {0xC152500C, 0xC152500C}, + {0xC1096CF6, 0}, {0xC0C0C0C1, 0}, {0xC0784B2F, 0}, + {0xC0300C03, 0xC0300C03}, {0xBFE80300, 0}, {0xBFA02FE8, 0xBFA02FE8}, + {0xBF589280, 0}, {0xBF112A8B, 0}, {0xBEC9F7CE, 0}, + {0xBE82FA0C, 0}, {0xBE3C310C, 0}, {0xBDF59C92, 0}, + {0xBDAF3C64, 0}, {0xBD691047, 0xBD691047}, {0xBD231803, 0}, + {0xBCDD535E, 0}, {0xBC97C21E, 0xBC97C21E}, {0xBC52640C, 0}, + {0xBC0D38EE, 0xBC0D38EE}, {0xBBC8408D, 0}, {0xBB837AB1, 0}, + {0xBB3EE722, 0}, {0xBAFA85A9, 0xBAFA85A9}, {0xBAB65610, 0xBAB65610}, + {0xBA725820, 0xBA725820}, {0xBA2E8BA3, 0}, {0xB9EAF063, 0}, + {0xB9A7862A, 0xB9A7862A}, {0xB9644CC4, 0}, {0xB92143FA, 0xB92143FA}, + {0xB8DE6B9A, 0}, {0xB89BC36D, 0}, {0xB8594B41, 0}, + {0xB81702E1, 0}, {0xB7D4EA19, 0xB7D4EA19}, {0xB79300B8, 0}, + {0xB7514689, 0}, {0xB70FBB5A, 0xB70FBB5A}, {0xB6CE5EF9, 0xB6CE5EF9}, + {0xB68D3134, 0xB68D3134}, {0xB64C31D9, 0}, {0xB60B60B6, 0xB60B60B6}, + {0xB5CABD9B, 0}, {0xB58A4855, 0xB58A4855}, {0xB54A00B5, 0xB54A00B5}, + {0xB509E68B, 0}, {0xB4C9F9A5, 0}, {0xB48A39D4, 0xB48A39D4}, + {0xB44AA6E9, 0xB44AA6E9}, {0xB40B40B4, 0xB40B40B4}, {0xB3CC0706, 0}, + {0xB38CF9B0, 0xB38CF9B0}, {0xB34E1884, 0}, {0xB30F6353, 0}, + {0xB2D0D9EF, 0}, {0xB2927C2A, 0}, {0xB25449D7, 0}, + {0xB21642C9, 0}, {0xB1D866D1, 0xB1D866D1}, {0xB19AB5C5, 0}, + {0xB15D2F76, 0}, {0xB11FD3B8, 0xB11FD3B8}, {0xB0E2A260, 0xB0E2A260}, + {0xB0A59B42, 0}, {0xB068BE31, 0}, {0xB02C0B03, 0}, + {0xAFEF818C, 0}, {0xAFB321A1, 0xAFB321A1}, {0xAF76EB19, 0}, + {0xAF3ADDC7, 0}, {0xAEFEF982, 0}, {0xAEC33E20, 0}, + {0xAE87AB76, 0xAE87AB76}, {0xAE4C415D, 0}, {0xAE10FFA9, 0}, + {0xADD5E632, 0xADD5E632}, {0xAD9AF4D0, 0}, {0xAD602B58, 0xAD602B58}, + {0xAD2589A4, 0}, {0xACEB0F89, 0xACEB0F89}, {0xACB0BCE1, 0xACB0BCE1}, + {0xAC769184, 0xAC769184}, {0xAC3C8D4A, 0}, {0xAC02B00B, 0}, + {0xABC8F9A0, 0xABC8F9A0}, {0xAB8F69E3, 0}, {0xAB5600AC, 0}, + {0xAB1CBDD4, 0}, {0xAAE3A136, 0}, {0xAAAAAAAB, 0}, + {0xAA71DA0D, 0}, {0xAA392F36, 0}, {0xAA00AA01, 0}, + {0xA9C84A48, 0}, {0xA9900FE6, 0}, {0xA957FAB5, 0xA957FAB5}, + {0xA9200A92, 0xA9200A92}, {0xA8E83F57, 0xA8E83F57}, {0xA8B098E0, 0xA8B098E0}, + {0xA8791709, 0}, {0xA841B9AD, 0}, {0xA80A80A8, 0xA80A80A8}, + {0xA7D36BD8, 0}, {0xA79C7B17, 0}, {0xA765AE44, 0}, + {0xA72F053A, 0}, {0xA6F87FD6, 0xA6F87FD6}, {0xA6C21DF7, 0}, + {0xA68BDF79, 0}, {0xA655C439, 0xA655C439}, {0xA61FCC16, 0xA61FCC16}, + {0xA5E9F6ED, 0xA5E9F6ED}, {0xA5B4449D, 0}, {0xA57EB503, 0}, + {0xA54947FE, 0}, {0xA513FD6C, 0}, {0xA4DED52C, 0xA4DED52C}, + {0xA4A9CF1E, 0}, {0xA474EB1F, 0xA474EB1F}, {0xA4402910, 0xA4402910}, + {0xA40B88D0, 0}, {0xA3D70A3E, 0}, {0xA3A2AD39, 0xA3A2AD39}, + {0xA36E71A3, 0}, {0xA33A575A, 0xA33A575A}, {0xA3065E40, 0}, + {0xA2D28634, 0}, {0xA29ECF16, 0xA29ECF16}, {0xA26B38C9, 0}, + {0xA237C32B, 0xA237C32B}, {0xA2046E1F, 0xA2046E1F}, {0xA1D13986, 0}, + {0xA19E2540, 0}, {0xA16B312F, 0}, {0xA1385D35, 0}, + {0xA105A933, 0}, {0xA0D3150C, 0}, {0xA0A0A0A1, 0}, + {0xA06E4BD4, 0xA06E4BD4}, {0xA03C1689, 0}, {0xA00A00A0, 0xA00A00A0}, + {0x9FD809FE, 0}, {0x9FA63284, 0}, {0x9F747A15, 0x9F747A15}, + {0x9F42E095, 0x9F42E095}, {0x9F1165E7, 0x9F1165E7}, {0x9EE009EE, 0x9EE009EE}, + {0x9EAECC8D, 0x9EAECC8D}, {0x9E7DADA9, 0}, {0x9E4CAD24, 0}, + {0x9E1BCAE3, 0}, {0x9DEB06C9, 0x9DEB06C9}, {0x9DBA60BB, 0x9DBA60BB}, + {0x9D89D89E, 0}, {0x9D596E54, 0x9D596E54}, {0x9D2921C4, 0}, + {0x9CF8F2D1, 0x9CF8F2D1}, {0x9CC8E161, 0}, {0x9C98ED58, 0}, + {0x9C69169B, 0x9C69169B}, {0x9C395D10, 0x9C395D10}, {0x9C09C09C, 0x9C09C09C}, + {0x9BDA4124, 0x9BDA4124}, {0x9BAADE8E, 0x9BAADE8E}, {0x9B7B98C0, 0}, + {0x9B4C6F9F, 0}, {0x9B1D6311, 0x9B1D6311}, {0x9AEE72FD, 0}, + {0x9ABF9F48, 0x9ABF9F48}, {0x9A90E7D9, 0x9A90E7D9}, {0x9A624C97, 0}, + {0x9A33CD67, 0x9A33CD67}, {0x9A056A31, 0}, {0x99D722DB, 0}, + {0x99A8F74C, 0}, {0x997AE76B, 0x997AE76B}, {0x994CF320, 0x994CF320}, + {0x991F1A51, 0x991F1A51}, {0x98F15CE7, 0}, {0x98C3BAC7, 0x98C3BAC7}, + {0x989633DB, 0x989633DB}, {0x9868C80A, 0}, {0x983B773B, 0}, + {0x980E4156, 0x980E4156}, {0x97E12644, 0x97E12644}, {0x97B425ED, 0x97B425ED}, + {0x97874039, 0}, {0x975A7510, 0}, {0x972DC45B, 0}, + {0x97012E02, 0x97012E02}, {0x96D4B1EF, 0}, {0x96A8500A, 0}, + {0x967C083B, 0}, {0x964FDA6C, 0x964FDA6C}, {0x9623C686, 0x9623C686}, + {0x95F7CC73, 0}, {0x95CBEC1B, 0}, {0x95A02568, 0x95A02568}, + {0x95747844, 0}, {0x9548E498, 0}, {0x951D6A4E, 0}, + {0x94F2094F, 0x94F2094F}, {0x94C6C187, 0}, {0x949B92DE, 0}, + {0x94707D3F, 0}, {0x94458094, 0x94458094}, {0x941A9CC8, 0x941A9CC8}, + {0x93EFD1C5, 0x93EFD1C5}, {0x93C51F76, 0}, {0x939A85C4, 0x939A85C4}, + {0x9370049C, 0}, {0x93459BE7, 0}, {0x931B4B91, 0}, + {0x92F11384, 0x92F11384}, {0x92C6F3AC, 0x92C6F3AC}, {0x929CEBF5, 0}, + {0x9272FC48, 0x9272FC48}, {0x92492492, 0x92492492}, {0x921F64BF, 0}, + {0x91F5BCB9, 0}, {0x91CC2C6C, 0x91CC2C6C}, {0x91A2B3C5, 0}, + {0x917952AF, 0}, {0x91500915, 0x91500915}, {0x9126D6E5, 0}, + {0x90FDBC09, 0x90FDBC09}, {0x90D4B86F, 0}, {0x90ABCC02, 0x90ABCC02}, + {0x9082F6B0, 0}, {0x905A3863, 0x905A3863}, {0x9031910A, 0}, + {0x90090090, 0x90090090}, {0x8FE086E3, 0}, {0x8FB823EE, 0x8FB823EE}, + {0x8F8FD7A0, 0}, {0x8F67A1E4, 0}, {0x8F3F82A8, 0x8F3F82A8}, + {0x8F1779DA, 0}, {0x8EEF8766, 0}, {0x8EC7AB3A, 0}, + {0x8E9FE542, 0x8E9FE542}, {0x8E78356D, 0x8E78356D}, {0x8E509BA8, 0x8E509BA8}, + {0x8E2917E1, 0}, {0x8E01AA05, 0}, {0x8DDA5202, 0x8DDA5202}, + {0x8DB30FC6, 0x8DB30FC6}, {0x8D8BE340, 0}, {0x8D64CC5C, 0}, + {0x8D3DCB09, 0}, {0x8D16DF35, 0x8D16DF35}, {0x8CF008CF, 0x8CF008CF}, + {0x8CC947C5, 0}, {0x8CA29C04, 0x8CA29C04}, {0x8C7C057D, 0}, + {0x8C55841D, 0}, {0x8C2F17D2, 0x8C2F17D2}, {0x8C08C08C, 0x8C08C08C}, + {0x8BE27E39, 0x8BE27E39}, {0x8BBC50C9, 0}, {0x8B963829, 0x8B963829}, + {0x8B70344A, 0x8B70344A}, {0x8B4A451A, 0}, {0x8B246A88, 0}, + {0x8AFEA483, 0x8AFEA483}, {0x8AD8F2FC, 0}, {0x8AB355E0, 0x8AB355E0}, + {0x8A8DCD20, 0}, {0x8A6858AB, 0}, {0x8A42F870, 0x8A42F870}, + {0x8A1DAC60, 0x8A1DAC60}, {0x89F8746A, 0}, {0x89D3507D, 0}, + {0x89AE408A, 0}, {0x89894480, 0}, {0x89645C4F, 0x89645C4F}, + {0x893F87E8, 0x893F87E8}, {0x891AC73B, 0}, {0x88F61A37, 0x88F61A37}, + {0x88D180CD, 0x88D180CD}, {0x88ACFAEE, 0}, {0x88888889, 0}, + {0x8864298F, 0}, {0x883FDDF0, 0x883FDDF0}, {0x881BA59E, 0}, + {0x87F78088, 0}, {0x87D36EA0, 0}, {0x87AF6FD6, 0}, + {0x878B841B, 0}, {0x8767AB5F, 0x8767AB5F}, {0x8743E595, 0}, + {0x872032AC, 0x872032AC}, {0x86FC9296, 0x86FC9296}, {0x86D90545, 0}, + {0x86B58AA8, 0}, {0x869222B2, 0}, {0x866ECD53, 0x866ECD53}, + {0x864B8A7E, 0}, {0x86285A23, 0x86285A23}, {0x86053C34, 0x86053C34}, + {0x85E230A3, 0x85E230A3}, {0x85BF3761, 0x85BF3761}, {0x859C5060, 0x859C5060}, + {0x85797B91, 0x85797B91}, {0x8556B8E7, 0x8556B8E7}, {0x85340853, 0x85340853}, + {0x851169C7, 0x851169C7}, {0x84EEDD36, 0}, {0x84CC6290, 0}, + {0x84A9F9C8, 0x84A9F9C8}, {0x8487A2D1, 0}, {0x84655D9C, 0}, + {0x84432A1B, 0x84432A1B}, {0x84210842, 0x84210842}, {0x83FEF802, 0x83FEF802}, + {0x83DCF94E, 0}, {0x83BB0C18, 0}, {0x83993052, 0x83993052}, + {0x837765F0, 0x837765F0}, {0x8355ACE4, 0}, {0x83340520, 0x83340520}, + {0x83126E98, 0}, {0x82F0E93D, 0x82F0E93D}, {0x82CF7504, 0}, + {0x82AE11DE, 0}, {0x828CBFBF, 0}, {0x826B7E99, 0x826B7E99}, + {0x824A4E61, 0}, {0x82292F08, 0}, {0x82082082, 0x82082082}, + {0x81E722C2, 0x81E722C2}, {0x81C635BC, 0x81C635BC}, {0x81A55963, 0}, + {0x81848DA9, 0}, {0x8163D283, 0}, {0x814327E4, 0}, + {0x81228DBF, 0}, {0x81020408, 0x81020408}, {0x80E18AB3, 0}, + {0x80C121B3, 0}, {0x80A0C8FB, 0x80A0C8FB}, {0x80808081, 0}, + {0x80604836, 0x80604836}, {0x80402010, 0x80402010}, {0x80200802, 0x80200802}, + {0xFFFFFFFF, 0xFFFFFFFF} +}; diff --git a/vp10/common/odintrin.h b/vp10/common/odintrin.h new file mode 100644 index 000000000..c96f8a742 --- /dev/null +++ b/vp10/common/odintrin.h @@ -0,0 +1,47 @@ +#ifndef VP10_COMMON_ODINTRIN_H_ +#define VP10_COMMON_ODINTRIN_H_ + +#include "vp10/common/enums.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/bitops.h" + +/*Smallest blocks are 4x4*/ +# define OD_LOG_BSIZE0 (2) +/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/ +# define OD_NBSIZES (5) +/*The log of the maximum length of the side of a block.*/ +# define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1) +/*The maximum length of the side of a block.*/ +# define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX) + +typedef int od_coeff; + +typedef int16_t od_dering_in; + +# define OD_DIVU_DMAX (1024) + +extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2]; + +/*Translate unsigned division by small divisors into multiplications.*/ +# define OD_DIVU_SMALL(_x, _d) \ + ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0]* \ + (uint64_t)(_x)+OD_DIVU_SMALL_CONSTS[(_d)-1][1])>>32)>> \ + (OD_ILOG(_d)-1)) + +# define OD_DIVU(_x, _d) \ + (((_d) < OD_DIVU_DMAX)?(OD_DIVU_SMALL((_x), (_d))):((_x)/(_d))) + +#define OD_MINI VPXMIN +#define OD_CLAMPI(min, val, max) clamp((val), (min), (max)) + +# define OD_CLZ0 (1) +# define OD_CLZ(x) (-get_msb(x)) +# define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x)) +/*Note that __builtin_clz is not defined when x == 0, according to the gcc + documentation (and that of the x86 BSR instruction that implements it), so + we have to special-case it. + We define a special version of the macro to use when x can be zero.*/ +# define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0) + +#endif diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 1f0c1bf4f..dfa04b594 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -368,6 +368,9 @@ typedef struct VP10Common { BLOCK_SIZE sb_size; // Size of the superblock used for this frame int mib_size; // Size of the superblock in units of MI blocks int mib_size_log2; // Log 2 of above. +#if CONFIG_DERING + int dering_level; +#endif } VP10_COMMON; // TODO(hkuang): Don't need to lock the whole pool after implementing atomic diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h index a76735266..12dc6914d 100644 --- a/vp10/common/vp10_txfm.h +++ b/vp10/common/vp10_txfm.h @@ -95,7 +95,7 @@ static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1, printf( "%s overflow result_32: %d result_64: %lld w0: %d in0: %d w1: %d in1: " "%d\n", - __func__, result_32, (long long int)result_64, w0, in0, w1, in1); + __func__, result_32, ((long long int)result_64), w0, in0, w1, in1); assert(0 && "half_btf overflow"); } #endif diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index c1eaed7cd..f2f8ebbab 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -14,6 +14,7 @@ #include "./vp10_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" +#include "./vpx_config.h" #include "vpx_dsp/bitreader_buffer.h" #include "vp10/decoder/bitreader.h" @@ -29,6 +30,9 @@ #include "vp10/common/clpf.h" #endif #include "vp10/common/common.h" +#if CONFIG_DERING +#include "vp10/common/dering.h" +#endif // CONFIG_DERING #include "vp10/common/entropy.h" #include "vp10/common/entropymode.h" #include "vp10/common/idct.h" @@ -1776,6 +1780,16 @@ static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); +#if DERING_REFINEMENT + if (bsize == BLOCK_64X64) { + if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) { + cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain = + vpx_read_literal(r, DERING_REFINEMENT_BITS); + } else { + cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain = 0; + } + } +#endif // DERGING_REFINEMENT #endif // CONFIG_EXT_PARTITION_TYPES } @@ -1951,6 +1965,12 @@ static void setup_clpf(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { } #endif +#if CONFIG_DERING +static void setup_dering(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { + cm->dering_level = vpx_rb_read_literal(rb, DERING_LEVEL_BITS); +} +#endif // CONFIG_DERING + static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 6) : 0; } @@ -2706,6 +2726,11 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, const uint8_t *data, if (cm->clpf && !cm->skip_loop_filter) vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb); #endif +#if CONFIG_DERING + if (cm->dering_level && !cm->skip_loop_filter) { + vp10_dering_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->dering_level); + } +#endif // CONFIG_DERING if (cm->frame_parallel_decode) vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX); @@ -3242,6 +3267,9 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, #if CONFIG_CLPF setup_clpf(cm, rb); #endif +#if CONFIG_DERING + setup_dering(cm, rb); +#endif #if CONFIG_LOOP_RESTORATION setup_restoration(cm, rb); #endif // CONFIG_LOOP_RESTORATION diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index fb101af10..d5bf02c34 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -23,6 +23,9 @@ #if CONFIG_CLPF #include "vp10/common/clpf.h" #endif +#if CONFIG_DERING +#include "vp10/common/dering.h" +#endif // CONFIG_DERING #include "vp10/common/entropy.h" #include "vp10/common/entropymode.h" #include "vp10/common/entropymv.h" @@ -1875,6 +1878,15 @@ static void write_modes_sb(VP10_COMP *const cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) update_partition_context(xd, mi_row, mi_col, subsize, bsize); + +#if DERING_REFINEMENT + if (bsize == BLOCK_64X64 && cm->dering_level != 0 && + !sb_all_skip(cm, mi_row, mi_col)) { + vpx_write_literal( + w, cm->mi_grid_visible[mi_row*cm->mi_stride + mi_col]->mbmi.dering_gain, + DERING_REFINEMENT_BITS); + } +#endif #endif // CONFIG_EXT_PARTITION_TYPES } @@ -2447,6 +2459,12 @@ static void encode_clpf(const VP10_COMMON *cm, } #endif +#if CONFIG_DERING +static void encode_dering(int level, struct vpx_write_bit_buffer *wb) { + vpx_wb_write_literal(wb, level, DERING_LEVEL_BITS); +} +#endif // CONFIG_DERING + static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) { if (delta_q != 0) { vpx_wb_write_bit(wb, 1); @@ -3103,6 +3121,9 @@ static void write_uncompressed_header(VP10_COMP *cpi, #if CONFIG_CLPF encode_clpf(cm, wb); #endif +#if CONFIG_DERING + encode_dering(cm->dering_level, wb); +#endif // CONFIG_DERING #if CONFIG_LOOP_RESTORATION encode_restoration(cm, wb); #endif // CONFIG_LOOP_RESTORATION diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index afe8dc5d5..823c8610a 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -18,6 +18,9 @@ #if CONFIG_CLPF #include "vp10/common/clpf.h" #endif +#if CONFIG_DERING +#include "vp10/common/dering.h" +#endif // CONFIG_DERING #include "vp10/common/filter.h" #include "vp10/common/idct.h" #include "vp10/common/reconinter.h" @@ -382,7 +385,6 @@ void vp10_initialize_enc(void) { vp10_init_me_luts(); vp10_rc_init_minq_luts(); vp10_entropy_mv_init(); - vp10_temporal_filter_init(); vp10_encode_token_init(); #if CONFIG_EXT_INTER vp10_init_wedge_masks(); @@ -3343,6 +3345,15 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); #endif } +#if CONFIG_DERING + if (is_lossless_requested(&cpi->oxcf)) { + cm->dering_level = 0; + } else { + cm->dering_level = vp10_dering_search(cm->frame_to_show, cpi->Source, cm, + xd); + vp10_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level); + } +#endif // CONFIG_DERING #if CONFIG_CLPF cm->clpf = 0; diff --git a/vp10/encoder/pickdering.c b/vp10/encoder/pickdering.c new file mode 100644 index 000000000..5a185ee70 --- /dev/null +++ b/vp10/encoder/pickdering.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_scale_rtcd.h" +#include "vp10/common/dering.h" +#include "vp10/common/onyxc_int.h" +#include "vp10/common/reconinter.h" +#include "vp10/encoder/encoder.h" +#include "vpx/vpx_integer.h" + +static double compute_dist(int16_t *x, int xstride, int16_t *y, int ystride, + int nhb, int nvb, int coeff_shift) { + int i, j; + double sum; + sum = 0; + for (i = 0; i < nvb << 3; i++) { + for (j = 0; j < nhb << 3; j++) { + double tmp; + tmp = x[i*xstride + j] - y[i*ystride + j]; + sum += tmp*tmp; + } + } + return sum/(double)(1 << 2*coeff_shift); +} + +int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, + VP10_COMMON *cm, + MACROBLOCKD *xd) { + int r, c; + int sbr, sbc; + int nhsb, nvsb; + od_dering_in *src; + int16_t *ref_coeff; + unsigned char *bskip; + int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; + int stride; + int bsize[3]; + int dec[3]; + int pli; + int (*mse)[MAX_DERING_LEVEL]; + int best_count[MAX_DERING_LEVEL] = {0}; + double tot_mse[MAX_DERING_LEVEL] = {0}; + int level; + int best_level; + int global_level; + double best_tot_mse = 1e15; + int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); + src = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); + ref_coeff = vpx_malloc(sizeof(*ref_coeff)*cm->mi_rows*cm->mi_cols*64); + bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); + vp10_setup_dst_planes(xd->plane, frame, 0, 0); + for (pli = 0; pli < 3; pli++) { + dec[pli] = xd->plane[pli].subsampling_x; + bsize[pli] = 8 >> dec[pli]; + } + stride = bsize[0]*cm->mi_cols; + for (r = 0; r < bsize[0]*cm->mi_rows; ++r) { + for (c = 0; c < bsize[0]*cm->mi_cols; ++c) { +#if CONFIG_VPX_HIGHBITDEPTH + if (cm->use_highbitdepth) { + src[r * stride + c] = + CONVERT_TO_SHORTPTR(xd->plane[0].dst.buf) + [r*xd->plane[0].dst.stride + c]; + ref_coeff[r * stride + c] = + CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c]; + } else { +#endif + src[r * stride + c] = + xd->plane[0].dst.buf[r*xd->plane[0].dst.stride + c]; + ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c]; +#if CONFIG_VPX_HIGHBITDEPTH + } +#endif + } + } + for (r = 0; r < cm->mi_rows; ++r) { + for (c = 0; c < cm->mi_cols; ++c) { + const MB_MODE_INFO *mbmi = + &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; + bskip[r * cm->mi_cols + c] = mbmi->skip; + } + } + nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1)/MAX_MIB_SIZE; + nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1)/MAX_MIB_SIZE; + mse = vpx_malloc(nvsb*nhsb*sizeof(*mse)); + for (sbr = 0; sbr < nvsb; sbr++) { + for (sbc = 0; sbc < nhsb; sbc++) { + int best_mse = 1000000000; + int nvb, nhb; + int16_t dst[MAX_MIB_SIZE*MAX_MIB_SIZE*8*8]; + best_level = 0; + nhb = VPXMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE*sbc); + nvb = VPXMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE*sbr); + for (level = 0; level < 64; level++) { + int threshold; + threshold = level << coeff_shift; + od_dering( + &OD_DERING_VTBL_C, + dst, + MAX_MIB_SIZE*bsize[0], + &src[sbr*stride*bsize[0]*MAX_MIB_SIZE + + sbc*bsize[0]*MAX_MIB_SIZE], + cm->mi_cols*bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0, dir, 0, + &bskip[MAX_MIB_SIZE*sbr*cm->mi_cols + MAX_MIB_SIZE*sbc], + cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); + mse[nhsb*sbr+sbc][level] = (int)compute_dist( + dst, MAX_MIB_SIZE*bsize[0], + &ref_coeff[sbr*stride*bsize[0]*MAX_MIB_SIZE + + sbc*bsize[0]*MAX_MIB_SIZE], + stride, nhb, nvb, coeff_shift); + tot_mse[level] += mse[nhsb*sbr+sbc][level]; + if (mse[nhsb*sbr+sbc][level] < best_mse) { + best_mse = mse[nhsb*sbr+sbc][level]; + best_level = level; + } + } + best_count[best_level]++; + } + } +#if DERING_REFINEMENT + best_level = 0; + /* Search for the best global level one value at a time. */ + for (global_level = 2; global_level < MAX_DERING_LEVEL; global_level++) { + double tot_mse = 0; + for (sbr = 0; sbr < nvsb; sbr++) { + for (sbc = 0; sbc < nhsb; sbc++) { + int gi; + int best_mse = mse[nhsb*sbr+sbc][0]; + for (gi = 1; gi < 4; gi++) { + level = compute_level_from_index(global_level, gi); + if (mse[nhsb*sbr+sbc][level] < best_mse) { + best_mse = mse[nhsb*sbr+sbc][level]; + } + } + tot_mse += best_mse; + } + } + if (tot_mse < best_tot_mse) { + best_level = global_level; + best_tot_mse = tot_mse; + } + } + for (sbr = 0; sbr < nvsb; sbr++) { + for (sbc = 0; sbc < nhsb; sbc++) { + int gi; + int best_gi; + int best_mse = mse[nhsb*sbr+sbc][0]; + best_gi = 0; + for (gi = 1; gi < DERING_REFINEMENT_LEVELS; gi++) { + level = compute_level_from_index(best_level, gi); + if (mse[nhsb*sbr+sbc][level] < best_mse) { + best_gi = gi; + best_mse = mse[nhsb*sbr+sbc][level]; + } + } + cm->mi_grid_visible[MAX_MIB_SIZE*sbr*cm->mi_stride + MAX_MIB_SIZE*sbc]-> + mbmi.dering_gain = best_gi; + } + } +#else + best_level = 0; + for (level = 0; level < MAX_DERING_LEVEL; level++) { + if (tot_mse[level] < tot_mse[best_level]) best_level = level; + } +#endif + vpx_free(src); + vpx_free(ref_coeff); + vpx_free(bskip); + vpx_free(mse); + return best_level; +} diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c index a22f3b536..cc484a8ea 100644 --- a/vp10/encoder/temporal_filter.c +++ b/vp10/encoder/temporal_filter.c @@ -15,6 +15,7 @@ #include "vp10/common/onyxc_int.h" #include "vp10/common/quant_common.h" #include "vp10/common/reconinter.h" +#include "vp10/common/odintrin.h" #include "vp10/encoder/extend.h" #include "vp10/encoder/firstpass.h" #include "vp10/encoder/mcomp.h" @@ -29,8 +30,6 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" -static int fixed_divide[512]; - static void temporal_filter_predictors_mb_c( MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, @@ -92,13 +91,6 @@ static void temporal_filter_predictors_mb_c( which_mv, interp_filter, mv_precision_uv, x, y); } -void vp10_temporal_filter_init(void) { - int i; - - fixed_divide[0] = 0; - for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; -} - void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, @@ -443,11 +435,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1_16[byte] = (uint16_t)pval; + dst1_16[byte] = + (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -467,16 +456,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1_16[byte] = (uint16_t)pval; + dst1_16[byte] = + (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2_16[byte] = (uint16_t)pval; + dst2_16[byte] = + (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; @@ -491,11 +476,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -512,16 +494,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (uint8_t)pval; + dst2[byte] = + (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; @@ -536,11 +514,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -557,16 +532,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (uint8_t)pval; + dst2[byte] = + (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; diff --git a/vp10/encoder/temporal_filter.h b/vp10/encoder/temporal_filter.h index 6e331e6ad..ce5291a53 100644 --- a/vp10/encoder/temporal_filter.h +++ b/vp10/encoder/temporal_filter.h @@ -15,7 +15,6 @@ extern "C" { #endif -void vp10_temporal_filter_init(void); void vp10_temporal_filter(VP10_COMP *cpi, int distance); #ifdef __cplusplus diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index 19fe6ca37..e25cdcf4f 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk @@ -91,6 +91,14 @@ VP10_COMMON_SRCS-yes += common/warped_motion.c endif VP10_COMMON_SRCS-yes += common/clpf.c VP10_COMMON_SRCS-yes += common/clpf.h +ifeq ($(CONFIG_DERING),yes) +VP10_COMMON_SRCS-yes += common/od_dering.c +VP10_COMMON_SRCS-yes += common/od_dering.h +VP10_COMMON_SRCS-yes += common/dering.c +VP10_COMMON_SRCS-yes += common/dering.h +endif +VP10_COMMON_SRCS-yes += common/odintrin.c +VP10_COMMON_SRCS-yes += common/odintrin.h ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index cb9e1084e..6764ac898 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -90,7 +90,9 @@ VP10_CX_SRCS-yes += encoder/temporal_filter.c VP10_CX_SRCS-yes += encoder/temporal_filter.h VP10_CX_SRCS-yes += encoder/mbgraph.c VP10_CX_SRCS-yes += encoder/mbgraph.h - +ifeq ($(CONFIG_DERING),yes) +VP10_CX_SRCS-yes += encoder/pickdering.c +endif VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -- 2.50.0