vp9_dc_left_predictor_16x16_neon,
vp9_dc_top_predictor_16x16_neon,
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
- vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
- vp9_tm_predictor_16x16_neon)
+ vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
+ NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
#if HAVE_MSA
vst1_u8(dst + i * stride, row);
}
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+ const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+ const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+ const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+ uint8x16_t row = vrhaddq_u8(avg1, A1);
+ int i;
+ (void)left;
+ for (i = 0; i < 15; ++i) {
+ vst1q_u8(dst + i * stride, row);
+ row = vextq_u8(row, above_right, 1);
+ }
+ vst1q_u8(dst + i * stride, row);
+}
+
// -----------------------------------------------------------------------------
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";