zig_zag1 = vld1q_u16(inv_zig_zag + 8);
int16x8_t x0, x1, sz0, sz1, y0, y1;
uint16x8_t eob0, eob1;
+#ifndef __aarch64__
uint16x4_t eob_d16;
uint32x2_t eob_d32;
uint32x4_t eob_q32;
+#endif // __arch64__
/* sign of z: z >> 15 */
sz0 = vshrq_n_s16(z0, 15);
/* select the largest value */
eob0 = vmaxq_u16(eob0, eob1);
+#ifdef __aarch64__
+ *d->eob = (int8_t)vmaxvq_u16(eob0);
+#else
eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
eob_q32 = vmovl_u16(eob_d16);
eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32));
eob_d32 = vpmax_u32(eob_d32, eob_d32);
+ vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
+#endif // __aarch64__
+
/* qcoeff = x */
vst1q_s16(d->qcoeff, x0);
vst1q_s16(d->qcoeff + 8, x1);
/* dqcoeff = x * dequant */
vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0));
vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1));
-
- vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
}
store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
+#else
{
const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
*eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
}
+#endif // __aarch64__
}
static INLINE int32x4_t extract_sign_bit(int32x4_t a) {
dqcoeff_ptr += 8;
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_u16(eob_max);
+#else
{
const uint16x4_t eob_max_0 =
vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max));
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
+#endif // __aarch64__
}
}
} while (n_coeffs > 0);
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_u16(eob_max);
+#else
{
const uint16x4_t eob_max_0 =
vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max));
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
+#endif // __aarch64__
}
static INLINE int32x4_t extract_sign_bit(int32x4_t a) {
}
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_u16(eob_max);
+#else
{
const uint16x4_t eob_max_0 =
vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max));
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
+#endif // __aarch64__
}