2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 EXPORT |vp9_v_predictor_4x4_neon|
12 EXPORT |vp9_v_predictor_8x8_neon|
13 EXPORT |vp9_v_predictor_16x16_neon|
14 EXPORT |vp9_v_predictor_32x32_neon|
15 EXPORT |vp9_h_predictor_4x4_neon|
16 EXPORT |vp9_h_predictor_8x8_neon|
17 EXPORT |vp9_h_predictor_16x16_neon|
18 EXPORT |vp9_h_predictor_32x32_neon|
19 EXPORT |vp9_tm_predictor_4x4_neon|
20 EXPORT |vp9_tm_predictor_8x8_neon|
25 AREA ||.text||, CODE, READONLY, ALIGN=2
27 ;void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
28 ; const uint8_t *above,
29 ; const uint8_t *left)
31 ; r1 ptrdiff_t y_stride
32 ; r2 const uint8_t *above
33 ; r3 const uint8_t *left
35 |vp9_v_predictor_4x4_neon| PROC
37 vst1.32 {d0[0]}, [r0], r1
38 vst1.32 {d0[0]}, [r0], r1
39 vst1.32 {d0[0]}, [r0], r1
40 vst1.32 {d0[0]}, [r0], r1
42 ENDP ; |vp9_v_predictor_4x4_neon|
44 ;void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
45 ; const uint8_t *above,
46 ; const uint8_t *left)
48 ; r1 ptrdiff_t y_stride
49 ; r2 const uint8_t *above
50 ; r3 const uint8_t *left
52 |vp9_v_predictor_8x8_neon| PROC
63 ENDP ; |vp9_v_predictor_8x8_neon|
65 ;void vp9_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
66 ; const uint8_t *above,
67 ; const uint8_t *left)
69 ; r1 ptrdiff_t y_stride
70 ; r2 const uint8_t *above
71 ; r3 const uint8_t *left
73 |vp9_v_predictor_16x16_neon| PROC
92 ENDP ; |vp9_v_predictor_16x16_neon|
94 ;void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
95 ; const uint8_t *above,
96 ; const uint8_t *left)
98 ; r1 ptrdiff_t y_stride
99 ; r2 const uint8_t *above
100 ; r3 const uint8_t *left
102 |vp9_v_predictor_32x32_neon| PROC
103 vld1.8 {q0, q1}, [r2]
106 vst1.8 {q0, q1}, [r0], r1
107 vst1.8 {q0, q1}, [r0], r1
108 vst1.8 {q0, q1}, [r0], r1
109 vst1.8 {q0, q1}, [r0], r1
110 vst1.8 {q0, q1}, [r0], r1
111 vst1.8 {q0, q1}, [r0], r1
112 vst1.8 {q0, q1}, [r0], r1
113 vst1.8 {q0, q1}, [r0], r1
114 vst1.8 {q0, q1}, [r0], r1
115 vst1.8 {q0, q1}, [r0], r1
116 vst1.8 {q0, q1}, [r0], r1
117 vst1.8 {q0, q1}, [r0], r1
118 vst1.8 {q0, q1}, [r0], r1
119 vst1.8 {q0, q1}, [r0], r1
120 vst1.8 {q0, q1}, [r0], r1
121 vst1.8 {q0, q1}, [r0], r1
125 ENDP ; |vp9_v_predictor_32x32_neon|
127 ;void vp9_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
128 ; const uint8_t *above,
129 ; const uint8_t *left)
131 ; r1 ptrdiff_t y_stride
132 ; r2 const uint8_t *above
133 ; r3 const uint8_t *left
135 |vp9_h_predictor_4x4_neon| PROC
136 vld1.32 {d1[0]}, [r3]
138 vst1.32 {d0[0]}, [r0], r1
140 vst1.32 {d0[0]}, [r0], r1
142 vst1.32 {d0[0]}, [r0], r1
144 vst1.32 {d0[0]}, [r0], r1
146 ENDP ; |vp9_h_predictor_4x4_neon|
148 ;void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
149 ; const uint8_t *above,
150 ; const uint8_t *left)
152 ; r1 ptrdiff_t y_stride
153 ; r2 const uint8_t *above
154 ; r3 const uint8_t *left
156 |vp9_h_predictor_8x8_neon| PROC
159 vst1.64 {d0}, [r0], r1
161 vst1.64 {d0}, [r0], r1
163 vst1.64 {d0}, [r0], r1
165 vst1.64 {d0}, [r0], r1
167 vst1.64 {d0}, [r0], r1
169 vst1.64 {d0}, [r0], r1
171 vst1.64 {d0}, [r0], r1
173 vst1.64 {d0}, [r0], r1
175 ENDP ; |vp9_h_predictor_8x8_neon|
177 ;void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
178 ; const uint8_t *above,
179 ; const uint8_t *left)
181 ; r1 ptrdiff_t y_stride
182 ; r2 const uint8_t *above
183 ; r3 const uint8_t *left
185 |vp9_h_predictor_16x16_neon| PROC
188 vst1.8 {q0}, [r0], r1
190 vst1.8 {q0}, [r0], r1
192 vst1.8 {q0}, [r0], r1
194 vst1.8 {q0}, [r0], r1
196 vst1.8 {q0}, [r0], r1
198 vst1.8 {q0}, [r0], r1
200 vst1.8 {q0}, [r0], r1
202 vst1.8 {q0}, [r0], r1
204 vst1.8 {q0}, [r0], r1
206 vst1.8 {q0}, [r0], r1
208 vst1.8 {q0}, [r0], r1
210 vst1.8 {q0}, [r0], r1
212 vst1.8 {q0}, [r0], r1
214 vst1.8 {q0}, [r0], r1
216 vst1.8 {q0}, [r0], r1
218 vst1.8 {q0}, [r0], r1
220 ENDP ; |vp9_h_predictor_16x16_neon|
222 ;void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
223 ; const uint8_t *above,
224 ; const uint8_t *left)
226 ; r1 ptrdiff_t y_stride
227 ; r2 const uint8_t *above
228 ; r3 const uint8_t *left
230 |vp9_h_predictor_32x32_neon| PROC
237 vst1.8 {q0}, [r0], r1
240 vst1.8 {q0}, [r0], r1
243 vst1.8 {q0}, [r0], r1
246 vst1.8 {q0}, [r0], r1
249 vst1.8 {q0}, [r0], r1
252 vst1.8 {q0}, [r0], r1
255 vst1.8 {q0}, [r0], r1
258 vst1.8 {q0}, [r0], r1
261 vst1.8 {q0}, [r0], r1
264 vst1.8 {q0}, [r0], r1
267 vst1.8 {q0}, [r0], r1
270 vst1.8 {q0}, [r0], r1
273 vst1.8 {q0}, [r0], r1
276 vst1.8 {q0}, [r0], r1
279 vst1.8 {q0}, [r0], r1
282 vst1.8 {q0}, [r0], r1
286 ENDP ; |vp9_h_predictor_32x32_neon|
288 ;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
289 ; const uint8_t *above,
290 ; const uint8_t *left)
292 ; r1 ptrdiff_t y_stride
293 ; r2 const uint8_t *above
294 ; r3 const uint8_t *left
296 |vp9_tm_predictor_4x4_neon| PROC
297 ; Load ytop_left = above[-1];
302 ; Load above 4 pixels
303 vld1.32 {d2[0]}, [r2]
305 ; Compute above - ytop_left
308 ; Load left row by row and compute left + (above - ytop_left)
309 ; 1st row and 2nd row
316 vqshrun.s16 d0, q1, #0
317 vqshrun.s16 d1, q2, #0
318 vst1.32 {d0[0]}, [r0], r1
319 vst1.32 {d1[0]}, [r0], r1
321 ; 3rd row and 4th row
328 vqshrun.s16 d0, q1, #0
329 vqshrun.s16 d1, q2, #0
330 vst1.32 {d0[0]}, [r0], r1
331 vst1.32 {d1[0]}, [r0], r1
333 ENDP ; |vp9_tm_predictor_4x4_neon|
335 ;void vp9_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
336 ; const uint8_t *above,
337 ; const uint8_t *left)
339 ; r1 ptrdiff_t y_stride
340 ; r2 const uint8_t *above
341 ; r3 const uint8_t *left
343 |vp9_tm_predictor_8x8_neon| PROC
344 ; Load ytop_left = above[-1];
349 ; Load above 8 pixels
352 ; Compute above - ytop_left
355 ; Load left row by row and compute left + (above - ytop_left)
356 ; 1st row and 2nd row
363 vqshrun.s16 d0, q1, #0
364 vqshrun.s16 d1, q2, #0
365 vst1.64 {d0}, [r0], r1
366 vst1.64 {d1}, [r0], r1
368 ; 3rd row and 4th row
375 vqshrun.s16 d0, q1, #0
376 vqshrun.s16 d1, q2, #0
377 vst1.64 {d0}, [r0], r1
378 vst1.64 {d1}, [r0], r1
380 ; 5th row and 6th row
387 vqshrun.s16 d0, q1, #0
388 vqshrun.s16 d1, q2, #0
389 vst1.64 {d0}, [r0], r1
390 vst1.64 {d1}, [r0], r1
392 ; 7rd row and 8th row
399 vqshrun.s16 d0, q1, #0
400 vqshrun.s16 d1, q2, #0
401 vst1.64 {d0}, [r0], r1
402 vst1.64 {d1}, [r0], r1
404 ENDP ; |vp9_tm_predictor_8x8_neon|