From 7971846a5ef42d22385a4dffd1a2aa0b6ee93728 Mon Sep 17 00:00:00 2001 From: hui su Date: Sun, 19 Jul 2015 15:02:56 -0700 Subject: [PATCH] Move intra prediction functions from vp9/common/ to vpx_dsp/ Change-Id: I64edc26cf4aab050c83f2d393df6250628ad43b8 --- build/make/rtcd.pl | 2 + test/test_intra_pred_speed.cc | 2 +- test/vp9_intrapred_test.cc | 2 +- vp9/common/vp9_common.h | 14 - vp9/common/vp9_reconintra.c | 679 +---------------- vp9/common/vp9_rtcd_defs.pl | 318 -------- vp9/vp9_common.mk | 12 - .../arm/intrapred_neon.c | 2 +- .../arm/intrapred_neon_asm.asm | 0 vpx_dsp/intrapred.c | 692 ++++++++++++++++++ vpx_dsp/mips/common_dspr2.h | 2 + .../mips/intrapred16_dspr2.c | 7 +- .../mips/intrapred4_dspr2.c | 25 +- .../mips/intrapred8_dspr2.c | 9 +- .../mips/intrapred_msa.c | 2 +- vpx_dsp/vpx_dsp.mk | 21 + vpx_dsp/vpx_dsp_common.h | 14 + vpx_dsp/vpx_dsp_rtcd_defs.pl | 318 ++++++++ .../x86/highbd_intrapred_sse2.asm | 0 .../x86/intrapred_sse2.asm | 0 .../x86/intrapred_ssse3.asm | 0 21 files changed, 1078 insertions(+), 1043 deletions(-) rename vp9/common/arm/neon/vp9_reconintra_neon.c => vpx_dsp/arm/intrapred_neon.c (99%) rename vp9/common/arm/neon/vp9_reconintra_neon_asm.asm => vpx_dsp/arm/intrapred_neon_asm.asm (100%) create mode 100644 vpx_dsp/intrapred.c rename vp9/common/mips/dspr2/vp9_intrapred16_dspr2.c => vpx_dsp/mips/intrapred16_dspr2.c (98%) rename vp9/common/mips/dspr2/vp9_intrapred4_dspr2.c => vpx_dsp/mips/intrapred4_dspr2.c (95%) rename vp9/common/mips/dspr2/vp9_intrapred8_dspr2.c => vpx_dsp/mips/intrapred8_dspr2.c (99%) rename vp9/common/mips/msa/vp9_intra_predict_msa.c => vpx_dsp/mips/intrapred_msa.c (99%) rename vp9/common/x86/vp9_high_intrapred_sse2.asm => vpx_dsp/x86/highbd_intrapred_sse2.asm (100%) rename vp9/common/x86/vp9_intrapred_sse2.asm => vpx_dsp/x86/intrapred_sse2.asm (100%) rename vp9/common/x86/vp9_intrapred_ssse3.asm => vpx_dsp/x86/intrapred_ssse3.asm (100%) diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl index 6753ee776..7dff0280e 100755 --- a/build/make/rtcd.pl +++ b/build/make/rtcd.pl @@ -319,6 +319,8 @@ EOF print < 0 ? get_msb(num_values) + 1 : 0; } -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE uint16_t clip_pixel_highbd(int val, int bd) { - switch (bd) { - case 8: - default: - return (uint16_t)clamp(val, 0, 255); - case 10: - return (uint16_t)clamp(val, 0, 1023); - case 12: - return (uint16_t)clamp(val, 0, 4095); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - #if CONFIG_DEBUG #define CHECK_MEM_ERROR(cm, lval, expr) do { \ lval = (expr); \ diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index f969ff1a8..6dc83c901 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -9,7 +9,7 @@ */ #include "./vpx_config.h" -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -50,683 +50,6 @@ static const uint8_t extend_modes[INTRA_MODES] = { NEED_LEFT | NEED_ABOVE, // TM }; -// This serves as a wrapper function, so that all the prediction functions -// can be unified and accessed as a pointer array. Note that the boundary -// above and left are not necessarily used all the time. -#define intra_pred_sized(type, size) \ - void vp9_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ - ptrdiff_t stride, \ - const uint8_t *above, \ - const uint8_t *left) { \ - type##_predictor(dst, stride, size, above, left); \ - } - -#if CONFIG_VP9_HIGHBITDEPTH -#define intra_pred_highbd_sized(type, size) \ - void vp9_highbd_##type##_predictor_##size##x##size##_c( \ - uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ - const uint16_t *left, int bd) { \ - highbd_##type##_predictor(dst, stride, size, above, left, bd); \ - } - -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#else - -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) -#endif // CONFIG_VP9_HIGHBITDEPTH - -#define DST(x, y) dst[(x) + (y) * stride] -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) above; - (void) bd; - - // First column. - for (r = 0; r < bs - 1; ++r) { - dst[r * stride] = AVG2(left[r], left[r + 1]); - } - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Second column. - for (r = 0; r < bs - 2; ++r) { - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - } - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Rest of last row. - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) { - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; - } -} - -static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} - -static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, - const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], - above[r + c + 2]) - : above[bs * 2 - 1]; - } - dst += stride; - } -} - -static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - -static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) left; - (void) bd; - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs * sizeof(uint16_t)); - dst += stride; - } -} - -static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) bd; - for (r = 0; r < bs; r++) { - vpx_memset16(dst, left[r], bs); - dst += stride; - } -} - -static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - int ytop_left = above[-1]; - (void) bd; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); - dst += stride; - } -} - -static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, 128 << (bd - 8), bs); - dst += stride; - } -} - -static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) above; - (void) bd; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) left; - (void) bd; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - (void) bd; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - (void)above; - DST(0, 0) = AVG2(I, J); - DST(2, 0) = DST(0, 1) = AVG2(J, K); - DST(2, 1) = DST(0, 2) = AVG2(K, L); - DST(1, 0) = AVG3(I, J, K); - DST(3, 0) = DST(1, 1) = AVG3(J, K, L); - DST(3, 1) = DST(1, 2) = AVG3(K, L, L); - DST(3, 2) = DST(2, 2) = - DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; -} - -static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) above; - // first column - for (r = 0; r < bs - 1; ++r) - dst[r * stride] = AVG2(left[r], left[r + 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // second column - for (r = 0; r < bs - 2; ++r) - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // rest of last row - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; -} -intra_pred_no_4x4(d207) - -void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - (void)left; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = DST(0, 2) = AVG2(B, C); - DST(2, 0) = DST(1, 2) = AVG2(C, D); - DST(3, 0) = DST(2, 2) = AVG2(D, E); - DST(3, 2) = AVG2(E, F); // differs from vp8 - - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = DST(0, 3) = AVG3(B, C, D); - DST(2, 1) = DST(1, 3) = AVG3(C, D, E); - DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 3) = AVG3(E, F, G); // differs from vp8 -} - -static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int size; - (void)left; - for (c = 0; c < bs; ++c) { - dst[c] = AVG2(above[c], above[c + 1]); - dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); - } - for (r = 2, size = bs - 2; r < bs; r += 2, --size) { - memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); - memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); - memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); - memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); - } -} -intra_pred_no_4x4(d63) - -void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = H; // differs from vp8 -} - -static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - const uint8_t above_right = above[bs - 1]; - const uint8_t *const dst_row0 = dst; - int x, size; - (void)left; - - for (x = 0; x < bs - 1; ++x) { - dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); - } - dst[bs - 1] = above_right; - dst += stride; - for (x = 1, size = bs - 2; x < bs; ++x, --size) { - memcpy(dst, dst_row0 + x, size); - memset(dst + size, above_right, x + 1); - dst += stride; - } -} -intra_pred_no_4x4(d45) - -void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - DST(0, 0) = DST(1, 2) = AVG2(X, A); - DST(1, 0) = DST(2, 2) = AVG2(A, B); - DST(2, 0) = DST(3, 2) = AVG2(B, C); - DST(3, 0) = AVG2(C, D); - - DST(0, 3) = AVG3(K, J, I); - DST(0, 2) = AVG3(J, I, X); - DST(0, 1) = DST(1, 3) = AVG3(I, X, A); - DST(1, 1) = DST(2, 3) = AVG3(X, A, B); - DST(2, 1) = DST(3, 3) = AVG3(A, B, C); - DST(3, 1) = AVG3(B, C, D); -} - -static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} -intra_pred_no_4x4(d117) - -void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - DST(0, 3) = AVG3(J, K, L); - DST(1, 3) = DST(0, 2) = AVG3(I, J, K); - DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); - DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); - DST(3, 1) = DST(2, 0) = AVG3(C, B, A); - DST(3, 0) = AVG3(D, C, B); -} - -static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; - } -} -intra_pred_no_4x4(d135) - -void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - - DST(0, 0) = DST(2, 1) = AVG2(I, X); - DST(0, 1) = DST(2, 2) = AVG2(J, I); - DST(0, 2) = DST(2, 3) = AVG2(K, J); - DST(0, 3) = AVG2(L, K); - - DST(3, 0) = AVG3(A, B, C); - DST(2, 0) = AVG3(X, A, B); - DST(1, 0) = DST(3, 1) = AVG3(I, X, A); - DST(1, 1) = DST(3, 2) = AVG3(J, I, X); - DST(1, 2) = DST(3, 3) = AVG3(K, J, I); - DST(1, 3) = AVG3(L, K, J); -} - -static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} -intra_pred_no_4x4(d153) - -static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) left; - - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs); - dst += stride; - } -} -intra_pred_allsizes(v) - -static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - - for (r = 0; r < bs; r++) { - memset(dst, left[r], bs); - dst += stride; - } -} -intra_pred_allsizes(h) - -static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int ytop_left = above[-1]; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel(left[r] + above[c] - ytop_left); - dst += stride; - } -} -intra_pred_allsizes(tm) - -static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - memset(dst, 128, bs); - dst += stride; - } -} -intra_pred_allsizes(dc_128) - -static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, - const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) above; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} -intra_pred_allsizes(dc_left) - -static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) left; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} -intra_pred_allsizes(dc_top) - -static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} -intra_pred_allsizes(dc) -#undef intra_pred_allsizes - typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index f80d31ed7..758d10577 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -60,165 +60,6 @@ if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) { $avx2_ssse3 = 'avx2'; } -# -# RECON -# -add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; - -add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d117_predictor_4x4/; - -add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d135_predictor_4x4 neon/; - -add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; - -add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; - -add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; - -add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d117_predictor_8x8/; - -add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d135_predictor_8x8/; - -add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; - -add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; - -add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; - -add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d117_predictor_16x16/; - -add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d135_predictor_16x16/; - -add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; - -add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; - -add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; - -add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d117_predictor_32x32/; - -add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d135_predictor_32x32/; - -add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d153_predictor_32x32/, "$ssse3_x86inc"; - -add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc"; - -add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; - -add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; - -add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; - # # post proc # @@ -438,165 +279,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # High bitdepth functions if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - # - # Intra prediction - # - add_proto qw/void vp9_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d207_predictor_4x4/; - - add_proto qw/void vp9_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d45_predictor_4x4/; - - add_proto qw/void vp9_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d63_predictor_4x4/; - - add_proto qw/void vp9_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_h_predictor_4x4/; - - add_proto qw/void vp9_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d117_predictor_4x4/; - - add_proto qw/void vp9_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d135_predictor_4x4/; - - add_proto qw/void vp9_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d153_predictor_4x4/; - - add_proto qw/void vp9_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_v_predictor_4x4/, "$sse_x86inc"; - - add_proto qw/void vp9_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_tm_predictor_4x4/, "$sse_x86inc"; - - add_proto qw/void vp9_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_predictor_4x4/, "$sse_x86inc"; - - add_proto qw/void vp9_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_top_predictor_4x4/; - - add_proto qw/void vp9_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_left_predictor_4x4/; - - add_proto qw/void vp9_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_128_predictor_4x4/; - - add_proto qw/void vp9_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d207_predictor_8x8/; - - add_proto qw/void vp9_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d45_predictor_8x8/; - - add_proto qw/void vp9_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d63_predictor_8x8/; - - add_proto qw/void vp9_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_h_predictor_8x8/; - - add_proto qw/void vp9_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d117_predictor_8x8/; - - add_proto qw/void vp9_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d135_predictor_8x8/; - - add_proto qw/void vp9_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d153_predictor_8x8/; - - add_proto qw/void vp9_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_v_predictor_8x8/, "$sse2_x86inc"; - - add_proto qw/void vp9_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_tm_predictor_8x8/, "$sse2_x86inc"; - - add_proto qw/void vp9_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_predictor_8x8/, "$sse2_x86inc";; - - add_proto qw/void vp9_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_top_predictor_8x8/; - - add_proto qw/void vp9_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_left_predictor_8x8/; - - add_proto qw/void vp9_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_128_predictor_8x8/; - - add_proto qw/void vp9_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d207_predictor_16x16/; - - add_proto qw/void vp9_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d45_predictor_16x16/; - - add_proto qw/void vp9_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d63_predictor_16x16/; - - add_proto qw/void vp9_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_h_predictor_16x16/; - - add_proto qw/void vp9_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d117_predictor_16x16/; - - add_proto qw/void vp9_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d135_predictor_16x16/; - - add_proto qw/void vp9_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d153_predictor_16x16/; - - add_proto qw/void vp9_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_v_predictor_16x16/, "$sse2_x86inc"; - - add_proto qw/void vp9_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; - - add_proto qw/void vp9_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_predictor_16x16/, "$sse2_x86inc"; - - add_proto qw/void vp9_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_top_predictor_16x16/; - - add_proto qw/void vp9_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_left_predictor_16x16/; - - add_proto qw/void vp9_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_128_predictor_16x16/; - - add_proto qw/void vp9_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d207_predictor_32x32/; - - add_proto qw/void vp9_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d45_predictor_32x32/; - - add_proto qw/void vp9_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d63_predictor_32x32/; - - add_proto qw/void vp9_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_h_predictor_32x32/; - - add_proto qw/void vp9_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d117_predictor_32x32/; - - add_proto qw/void vp9_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d135_predictor_32x32/; - - add_proto qw/void vp9_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_d153_predictor_32x32/; - - add_proto qw/void vp9_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_v_predictor_32x32/, "$sse2_x86inc"; - - add_proto qw/void vp9_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; - - add_proto qw/void vp9_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; - - add_proto qw/void vp9_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_top_predictor_32x32/; - - add_proto qw/void vp9_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_left_predictor_32x32/; - - add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vp9_highbd_dc_128_predictor_32x32/; - # # Sub Pixel Filters # diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 50c11b9a0..78ea63fa1 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -84,16 +84,11 @@ endif ifeq ($(CONFIG_USE_X86INC),yes) VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm -VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm endif ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm -ifeq ($(CONFIG_USE_X86INC),yes) -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_intrapred_sse2.asm -endif endif # common (c) @@ -108,9 +103,6 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_avg_horiz_dspr VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_horiz_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve8_vert_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_intrapred4_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_intrapred8_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_intrapred16_dspr2.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c @@ -135,7 +127,6 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c @@ -174,7 +165,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM) -VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM) else ifeq ($(HAVE_NEON), yes) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon.c @@ -194,6 +184,4 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c endif # HAVE_NEON endif # HAVE_NEON_ASM -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon.c - $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vpx_dsp/arm/intrapred_neon.c similarity index 99% rename from vp9/common/arm/neon/vp9_reconintra_neon.c rename to vpx_dsp/arm/intrapred_neon.c index 92706bf2c..85b2821c3 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.c +++ b/vpx_dsp/arm/intrapred_neon.c @@ -10,8 +10,8 @@ #include -#include "./vp9_rtcd.h" #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" //------------------------------------------------------------------------------ diff --git a/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/vpx_dsp/arm/intrapred_neon_asm.asm similarity index 100% rename from vp9/common/arm/neon/vp9_reconintra_neon_asm.asm rename to vpx_dsp/arm/intrapred_neon_asm.asm diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c new file mode 100644 index 000000000..2ef115dd1 --- /dev/null +++ b/vpx_dsp/intrapred.c @@ -0,0 +1,692 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" + +#define DST(x, y) dst[(x) + (y) * stride] +#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) +#define AVG2(a, b) (((a) + (b) + 1) >> 1) + +static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) above; + // first column + for (r = 0; r < bs - 1; ++r) + dst[r * stride] = AVG2(left[r], left[r + 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // second column + for (r = 0; r < bs - 2; ++r) + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // rest of last row + for (c = 0; c < bs - 2; ++c) + dst[(bs - 1) * stride + c] = left[bs - 1]; + + for (r = bs - 2; r >= 0; --r) + for (c = 0; c < bs - 2; ++c) + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; +} + +static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + int size; + (void)left; + for (c = 0; c < bs; ++c) { + dst[c] = AVG2(above[c], above[c + 1]); + dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); + } + for (r = 2, size = bs - 2; r < bs; r += 2, --size) { + memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); + memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); + memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); + memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); + } +} + +static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + const uint8_t above_right = above[bs - 1]; + const uint8_t *const dst_row0 = dst; + int x, size; + (void)left; + + for (x = 0; x < bs - 1; ++x) { + dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); + } + dst[bs - 1] = above_right; + dst += stride; + for (x = 1, size = bs - 2; x < bs; ++x, --size) { + memcpy(dst, dst_row0 + x, size); + memset(dst + size, above_right, x + 1); + dst += stride; + } +} + +static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + + // first row + for (c = 0; c < bs; c++) + dst[c] = AVG2(above[c - 1], above[c]); + dst += stride; + + // second row + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + dst += stride; + + // the rest of first col + dst[0] = AVG3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); + + // the rest of the block + for (r = 2; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-2 * stride + c - 1]; + dst += stride; + } +} + +static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; ++r) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + + dst += stride; + for (r = 1; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-stride + c - 1]; + dst += stride; + } +} + +static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + dst[0] = AVG2(above[-1], left[0]); + for (r = 1; r < bs; r++) + dst[r * stride] = AVG2(left[r - 1], left[r]); + dst++; + + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + dst++; + + for (c = 0; c < bs - 2; c++) + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); + dst += stride; + + for (r = 1; r < bs; ++r) { + for (c = 0; c < bs - 2; c++) + dst[c] = dst[-stride + c - 2]; + dst += stride; + } +} + +static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) left; + + for (r = 0; r < bs; r++) { + memcpy(dst, above, bs); + dst += stride; + } +} + +static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) above; + + for (r = 0; r < bs; r++) { + memset(dst, left[r], bs); + dst += stride; + } +} + +static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + int ytop_left = above[-1]; + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) + dst[c] = clip_pixel(left[r] + above[c] - ytop_left); + dst += stride; + } +} + +static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) above; + (void) left; + + for (r = 0; r < bs; r++) { + memset(dst, 128, bs); + dst += stride; + } +} + +static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, + const uint8_t *left) { + int i, r, expected_dc, sum = 0; + (void) above; + + for (i = 0; i < bs; i++) + sum += left[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int i, r, expected_dc, sum = 0; + (void) left; + + for (i = 0; i < bs; i++) + sum += above[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int i, r, expected_dc, sum = 0; + const int count = 2 * bs; + + for (i = 0; i < bs; i++) { + sum += above[i]; + sum += left[i]; + } + + expected_dc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + (void)above; + DST(0, 0) = AVG2(I, J); + DST(2, 0) = DST(0, 1) = AVG2(J, K); + DST(2, 1) = DST(0, 2) = AVG2(K, L); + DST(1, 0) = AVG3(I, J, K); + DST(3, 0) = DST(1, 1) = AVG3(J, K, L); + DST(3, 1) = DST(1, 2) = AVG3(K, L, L); + DST(3, 2) = DST(2, 2) = + DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; +} + +void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + (void)left; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG2(E, F); // differs from vp8 + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 3) = AVG3(E, F, G); // differs from vp8 +} + +void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)stride; + (void)left; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = H; // differs from vp8 +} + +void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + DST(0, 0) = DST(1, 2) = AVG2(X, A); + DST(1, 0) = DST(2, 2) = AVG2(A, B); + DST(2, 0) = DST(3, 2) = AVG2(B, C); + DST(3, 0) = AVG2(C, D); + + DST(0, 3) = AVG3(K, J, I); + DST(0, 2) = AVG3(J, I, X); + DST(0, 1) = DST(1, 3) = AVG3(I, X, A); + DST(1, 1) = DST(2, 3) = AVG3(X, A, B); + DST(2, 1) = DST(3, 3) = AVG3(A, B, C); + DST(3, 1) = AVG3(B, C, D); +} + +void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + (void)stride; + DST(0, 3) = AVG3(J, K, L); + DST(1, 3) = DST(0, 2) = AVG3(I, J, K); + DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); + DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); + DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); + DST(3, 1) = DST(2, 0) = AVG3(C, B, A); + DST(3, 0) = AVG3(D, C, B); +} + +void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + + DST(0, 0) = DST(2, 1) = AVG2(I, X); + DST(0, 1) = DST(2, 2) = AVG2(J, I); + DST(0, 2) = DST(2, 3) = AVG2(K, J); + DST(0, 3) = AVG2(L, K); + + DST(3, 0) = AVG3(A, B, C); + DST(2, 0) = AVG3(X, A, B); + DST(1, 0) = DST(3, 1) = AVG3(I, X, A); + DST(1, 1) = DST(3, 2) = AVG3(J, I, X); + DST(1, 2) = DST(3, 3) = AVG3(K, J, I); + DST(1, 3) = AVG3(L, K, J); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) above; + (void) bd; + + // First column. + for (r = 0; r < bs - 1; ++r) { + dst[r * stride] = AVG2(left[r], left[r + 1]); + } + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Second column. + for (r = 0; r < bs - 2; ++r) { + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + } + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Rest of last row. + for (c = 0; c < bs - 2; ++c) + dst[(bs - 1) * stride + c] = left[bs - 1]; + + for (r = bs - 2; r >= 0; --r) { + for (c = 0; c < bs - 2; ++c) + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; + } +} + +static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); + } + dst += stride; + } +} + +static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], + above[r + c + 2]) + : above[bs * 2 - 1]; + } + dst += stride; + } +} + +static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + + // first row + for (c = 0; c < bs; c++) + dst[c] = AVG2(above[c - 1], above[c]); + dst += stride; + + // second row + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + dst += stride; + + // the rest of first col + dst[0] = AVG3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); + + // the rest of the block + for (r = 2; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-2 * stride + c - 1]; + dst += stride; + } +} + +static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; ++r) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + + dst += stride; + for (r = 1; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-stride + c - 1]; + dst += stride; + } +} + +static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + dst[0] = AVG2(above[-1], left[0]); + for (r = 1; r < bs; r++) + dst[r * stride] = AVG2(left[r - 1], left[r]); + dst++; + + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + dst++; + + for (c = 0; c < bs - 2; c++) + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); + dst += stride; + + for (r = 1; r < bs; ++r) { + for (c = 0; c < bs - 2; c++) + dst[c] = dst[-stride + c - 2]; + dst += stride; + } +} + +static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) left; + (void) bd; + for (r = 0; r < bs; r++) { + memcpy(dst, above, bs * sizeof(uint16_t)); + dst += stride; + } +} + +static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) above; + (void) bd; + for (r = 0; r < bs; r++) { + vpx_memset16(dst, left[r], bs); + dst += stride; + } +} + +static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + int ytop_left = above[-1]; + (void) bd; + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) + dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); + dst += stride; + } +} + +static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) above; + (void) left; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, 128 << (bd - 8), bs); + dst += stride; + } +} + +static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + (void) above; + (void) bd; + + for (i = 0; i < bs; i++) + sum += left[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + (void) left; + (void) bd; + + for (i = 0; i < bs; i++) + sum += above[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + const int count = 2 * bs; + (void) bd; + + for (i = 0; i < bs; i++) { + sum += above[i]; + sum += left[i]; + } + + expected_dc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +// This serves as a wrapper function, so that all the prediction functions +// can be unified and accessed as a pointer array. Note that the boundary +// above and left are not necessarily used all the time. +#define intra_pred_sized(type, size) \ + void vp9_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ + ptrdiff_t stride, \ + const uint8_t *above, \ + const uint8_t *left) { \ + type##_predictor(dst, stride, size, above, left); \ + } + +#if CONFIG_VP9_HIGHBITDEPTH +#define intra_pred_highbd_sized(type, size) \ + void vp9_highbd_##type##_predictor_##size##x##size##_c( \ + uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ + const uint16_t *left, int bd) { \ + highbd_##type##_predictor(dst, stride, size, above, left, bd); \ + } + +#define intra_pred_allsizes(type) \ + intra_pred_sized(type, 4) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) \ + intra_pred_highbd_sized(type, 4) \ + intra_pred_highbd_sized(type, 8) \ + intra_pred_highbd_sized(type, 16) \ + intra_pred_highbd_sized(type, 32) + +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) \ + intra_pred_highbd_sized(type, 4) \ + intra_pred_highbd_sized(type, 8) \ + intra_pred_highbd_sized(type, 16) \ + intra_pred_highbd_sized(type, 32) + +#else +#define intra_pred_allsizes(type) \ + intra_pred_sized(type, 4) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) + +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) +#endif // CONFIG_VP9_HIGHBITDEPTH + +intra_pred_no_4x4(d207) +intra_pred_no_4x4(d63) +intra_pred_no_4x4(d45) +intra_pred_no_4x4(d117) +intra_pred_no_4x4(d135) +intra_pred_no_4x4(d153) +intra_pred_allsizes(v) +intra_pred_allsizes(h) +intra_pred_allsizes(tm) +intra_pred_allsizes(dc_128) +intra_pred_allsizes(dc_left) +intra_pred_allsizes(dc_top) +intra_pred_allsizes(dc) +#undef intra_pred_allsizes diff --git a/vpx_dsp/mips/common_dspr2.h b/vpx_dsp/mips/common_dspr2.h index 8278101dc..7a10bf1c4 100644 --- a/vpx_dsp/mips/common_dspr2.h +++ b/vpx_dsp/mips/common_dspr2.h @@ -21,6 +21,8 @@ extern "C" { #if HAVE_DSPR2 #define CROP_WIDTH 512 +extern uint8_t *vpx_ff_cropTbl; // From "vpx_dsp/mips/intrapred4_dspr2.c" + static INLINE void prefetch_load(const unsigned char *src) { __asm__ __volatile__ ( "pref 0, 0(%[src]) \n\t" diff --git a/vp9/common/mips/dspr2/vp9_intrapred16_dspr2.c b/vpx_dsp/mips/intrapred16_dspr2.c similarity index 98% rename from vp9/common/mips/dspr2/vp9_intrapred16_dspr2.c rename to vpx_dsp/mips/intrapred16_dspr2.c index b0dc496ae..c028e974e 100644 --- a/vp9/common/mips/dspr2/vp9_intrapred16_dspr2.c +++ b/vpx_dsp/mips/intrapred16_dspr2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -7,11 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" +#include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 void vp9_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride, diff --git a/vp9/common/mips/dspr2/vp9_intrapred4_dspr2.c b/vpx_dsp/mips/intrapred4_dspr2.c similarity index 95% rename from vp9/common/mips/dspr2/vp9_intrapred4_dspr2.c rename to vpx_dsp/mips/intrapred4_dspr2.c index a53c62381..479b61651 100644 --- a/vp9/common/mips/dspr2/vp9_intrapred4_dspr2.c +++ b/vpx_dsp/mips/intrapred4_dspr2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -7,13 +7,26 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" +#include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 +uint8_t vpx_ff_cropTbl_a[256 + 2 * CROP_WIDTH]; +uint8_t *vpx_ff_cropTbl; + +void vpx_dsputil_static_init(void) { + int i; + + for (i = 0; i < 256; i++) vpx_ff_cropTbl_a[i + CROP_WIDTH] = i; + + for (i = 0; i < CROP_WIDTH; i++) { + vpx_ff_cropTbl_a[i] = 0; + vpx_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255; + } + + vpx_ff_cropTbl = &vpx_ff_cropTbl_a[CROP_WIDTH]; +} + void vp9_h_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t tmp1, tmp2, tmp3, tmp4; @@ -91,7 +104,7 @@ void vp9_tm_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride, int32_t resl; int32_t resr; int32_t top_left; - uint8_t *cm = vp9_ff_cropTbl; + uint8_t *cm = vpx_ff_cropTbl; __asm__ __volatile__ ( "ulw %[resl], (%[above]) \n\t" diff --git a/vp9/common/mips/dspr2/vp9_intrapred8_dspr2.c b/vpx_dsp/mips/intrapred8_dspr2.c similarity index 99% rename from vp9/common/mips/dspr2/vp9_intrapred8_dspr2.c rename to vpx_dsp/mips/intrapred8_dspr2.c index 40d93ae35..69559d7c6 100644 --- a/vp9/common/mips/dspr2/vp9_intrapred8_dspr2.c +++ b/vpx_dsp/mips/intrapred8_dspr2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -7,11 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" +#include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 void vp9_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride, @@ -161,7 +158,7 @@ void vp9_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride, int32_t res0, res1, res2, res3; int32_t reshw; int32_t top_left; - uint8_t *cm = vp9_ff_cropTbl; + uint8_t *cm = vpx_ff_cropTbl; __asm__ __volatile__ ( "ulw %[reshw], (%[above]) \n\t" diff --git a/vp9/common/mips/msa/vp9_intra_predict_msa.c b/vpx_dsp/mips/intrapred_msa.c similarity index 99% rename from vp9/common/mips/msa/vp9_intra_predict_msa.c rename to vpx_dsp/mips/intrapred_msa.c index abf2704ca..2b44595a3 100644 --- a/vp9/common/mips/msa/vp9_intra_predict_msa.c +++ b/vpx_dsp/mips/intrapred_msa.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" #define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) { \ diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 2dfb8154f..d6870eed1 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -31,6 +31,27 @@ DSP_SRCS-yes += bitreader_buffer.c DSP_SRCS-yes += bitreader_buffer.h endif +# intra predictions +DSP_SRCS-yes += intrapred.c + +ifeq ($(CONFIG_USE_X86INC),yes) +DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm +DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm +endif + +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +ifeq ($(CONFIG_USE_X86INC),yes) +DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm +endif +endif + +DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM) +DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c +DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c +DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c +DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c +DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c + # loop filters DSP_SRCS-yes += loopfilter.c diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h index 67930367e..1fd7c153a 100644 --- a/vpx_dsp/vpx_dsp_common.h +++ b/vpx_dsp/vpx_dsp_common.h @@ -50,6 +50,20 @@ static INLINE double fclamp(double value, double low, double high) { return value < low ? low : (value > high ? high : value); } +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE uint16_t clip_pixel_highbd(int val, int bd) { + switch (bd) { + case 8: + default: + return (uint16_t)clamp(val, 0, 255); + case 10: + return (uint16_t)clamp(val, 0, 1023); + case 12: + return (uint16_t)clamp(val, 0, 4095); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + #ifdef __cplusplus } // extern "C" #endif diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 6dc44234a..d71b2f5d5 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -44,6 +44,324 @@ if ($opts{arch} eq "x86_64") { $avx2_x86_64 = 'avx2'; } +# +# Intra prediction +# +add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_4x4/; + +add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_4x4 neon/; + +add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; + +add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_8x8/; + +add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_8x8/; + +add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; + +add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_16x16/; + +add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_16x16/; + +add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_32x32/; + +add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_32x32/; + +add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; + +add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; + +# High bitdepth functions +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d207_predictor_4x4/; + + add_proto qw/void vp9_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d45_predictor_4x4/; + + add_proto qw/void vp9_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d63_predictor_4x4/; + + add_proto qw/void vp9_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_h_predictor_4x4/; + + add_proto qw/void vp9_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d117_predictor_4x4/; + + add_proto qw/void vp9_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d135_predictor_4x4/; + + add_proto qw/void vp9_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d153_predictor_4x4/; + + add_proto qw/void vp9_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_v_predictor_4x4/, "$sse_x86inc"; + + add_proto qw/void vp9_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_tm_predictor_4x4/, "$sse_x86inc"; + + add_proto qw/void vp9_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_predictor_4x4/, "$sse_x86inc"; + + add_proto qw/void vp9_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_top_predictor_4x4/; + + add_proto qw/void vp9_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_left_predictor_4x4/; + + add_proto qw/void vp9_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_128_predictor_4x4/; + + add_proto qw/void vp9_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d207_predictor_8x8/; + + add_proto qw/void vp9_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d45_predictor_8x8/; + + add_proto qw/void vp9_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d63_predictor_8x8/; + + add_proto qw/void vp9_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_h_predictor_8x8/; + + add_proto qw/void vp9_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d117_predictor_8x8/; + + add_proto qw/void vp9_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d135_predictor_8x8/; + + add_proto qw/void vp9_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d153_predictor_8x8/; + + add_proto qw/void vp9_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_v_predictor_8x8/, "$sse2_x86inc"; + + add_proto qw/void vp9_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_tm_predictor_8x8/, "$sse2_x86inc"; + + add_proto qw/void vp9_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_predictor_8x8/, "$sse2_x86inc";; + + add_proto qw/void vp9_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_top_predictor_8x8/; + + add_proto qw/void vp9_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_left_predictor_8x8/; + + add_proto qw/void vp9_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_128_predictor_8x8/; + + add_proto qw/void vp9_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d207_predictor_16x16/; + + add_proto qw/void vp9_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d45_predictor_16x16/; + + add_proto qw/void vp9_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d63_predictor_16x16/; + + add_proto qw/void vp9_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_h_predictor_16x16/; + + add_proto qw/void vp9_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d117_predictor_16x16/; + + add_proto qw/void vp9_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d135_predictor_16x16/; + + add_proto qw/void vp9_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d153_predictor_16x16/; + + add_proto qw/void vp9_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_v_predictor_16x16/, "$sse2_x86inc"; + + add_proto qw/void vp9_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; + + add_proto qw/void vp9_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_predictor_16x16/, "$sse2_x86inc"; + + add_proto qw/void vp9_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_top_predictor_16x16/; + + add_proto qw/void vp9_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_left_predictor_16x16/; + + add_proto qw/void vp9_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_128_predictor_16x16/; + + add_proto qw/void vp9_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d207_predictor_32x32/; + + add_proto qw/void vp9_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d45_predictor_32x32/; + + add_proto qw/void vp9_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d63_predictor_32x32/; + + add_proto qw/void vp9_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_h_predictor_32x32/; + + add_proto qw/void vp9_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d117_predictor_32x32/; + + add_proto qw/void vp9_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d135_predictor_32x32/; + + add_proto qw/void vp9_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_d153_predictor_32x32/; + + add_proto qw/void vp9_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_v_predictor_32x32/, "$sse2_x86inc"; + + add_proto qw/void vp9_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; + + add_proto qw/void vp9_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; + + add_proto qw/void vp9_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_top_predictor_32x32/; + + add_proto qw/void vp9_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_left_predictor_32x32/; + + add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vp9_highbd_dc_128_predictor_32x32/; +} + # # Loopfilter # diff --git a/vp9/common/x86/vp9_high_intrapred_sse2.asm b/vpx_dsp/x86/highbd_intrapred_sse2.asm similarity index 100% rename from vp9/common/x86/vp9_high_intrapred_sse2.asm rename to vpx_dsp/x86/highbd_intrapred_sse2.asm diff --git a/vp9/common/x86/vp9_intrapred_sse2.asm b/vpx_dsp/x86/intrapred_sse2.asm similarity index 100% rename from vp9/common/x86/vp9_intrapred_sse2.asm rename to vpx_dsp/x86/intrapred_sse2.asm diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vpx_dsp/x86/intrapred_ssse3.asm similarity index 100% rename from vp9/common/x86/vp9_intrapred_ssse3.asm rename to vpx_dsp/x86/intrapred_ssse3.asm -- 2.40.0