2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/scale.h"
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
30 // CPU agnostic row functions
31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
32 uint8* dst, int dst_width) {
34 for (x = 0; x < dst_width - 1; x += 2) {
45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
46 uint16* dst, int dst_width) {
48 for (x = 0; x < dst_width - 1; x += 2) {
59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
60 uint8* dst, int dst_width) {
61 const uint8* s = src_ptr;
63 for (x = 0; x < dst_width - 1; x += 2) {
64 dst[0] = (s[0] + s[1] + 1) >> 1;
65 dst[1] = (s[2] + s[3] + 1) >> 1;
70 dst[0] = (s[0] + s[1] + 1) >> 1;
74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
75 uint16* dst, int dst_width) {
76 const uint16* s = src_ptr;
78 for (x = 0; x < dst_width - 1; x += 2) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 dst[1] = (s[2] + s[3] + 1) >> 1;
85 dst[0] = (s[0] + s[1] + 1) >> 1;
89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
90 uint8* dst, int dst_width) {
91 const uint8* s = src_ptr;
92 const uint8* t = src_ptr + src_stride;
94 for (x = 0; x < dst_width - 1; x += 2) {
95 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
96 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
102 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
107 uint16* dst, int dst_width) {
108 const uint16* s = src_ptr;
109 const uint16* t = src_ptr + src_stride;
111 for (x = 0; x < dst_width - 1; x += 2) {
112 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
113 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
119 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
124 uint8* dst, int dst_width) {
126 for (x = 0; x < dst_width - 1; x += 2) {
137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
138 uint16* dst, int dst_width) {
140 for (x = 0; x < dst_width - 1; x += 2) {
151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
152 uint8* dst, int dst_width) {
153 intptr_t stride = src_stride;
155 for (x = 0; x < dst_width - 1; x += 2) {
156 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
157 src_ptr[stride + 0] + src_ptr[stride + 1] +
158 src_ptr[stride + 2] + src_ptr[stride + 3] +
159 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
160 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
161 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
162 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
164 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
165 src_ptr[stride + 4] + src_ptr[stride + 5] +
166 src_ptr[stride + 6] + src_ptr[stride + 7] +
167 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
168 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
169 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
170 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
176 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
177 src_ptr[stride + 0] + src_ptr[stride + 1] +
178 src_ptr[stride + 2] + src_ptr[stride + 3] +
179 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
180 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
181 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
182 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
188 uint16* dst, int dst_width) {
189 intptr_t stride = src_stride;
191 for (x = 0; x < dst_width - 1; x += 2) {
192 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
193 src_ptr[stride + 0] + src_ptr[stride + 1] +
194 src_ptr[stride + 2] + src_ptr[stride + 3] +
195 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
196 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
197 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
198 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
200 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
201 src_ptr[stride + 4] + src_ptr[stride + 5] +
202 src_ptr[stride + 6] + src_ptr[stride + 7] +
203 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
204 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
205 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
206 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
212 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
213 src_ptr[stride + 0] + src_ptr[stride + 1] +
214 src_ptr[stride + 2] + src_ptr[stride + 3] +
215 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
216 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
217 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
218 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
224 uint8* dst, int dst_width) {
226 assert((dst_width % 3 == 0) && (dst_width > 0));
227 for (x = 0; x < dst_width; x += 3) {
236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
237 uint16* dst, int dst_width) {
239 assert((dst_width % 3 == 0) && (dst_width > 0));
240 for (x = 0; x < dst_width; x += 3) {
249 // Filter rows 0 and 1 together, 3 : 1
250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
251 uint8* d, int dst_width) {
252 const uint8* s = src_ptr;
253 const uint8* t = src_ptr + src_stride;
255 assert((dst_width % 3 == 0) && (dst_width > 0));
256 for (x = 0; x < dst_width; x += 3) {
257 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
258 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
259 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
260 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
261 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
262 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
263 d[0] = (a0 * 3 + b0 + 2) >> 2;
264 d[1] = (a1 * 3 + b1 + 2) >> 2;
265 d[2] = (a2 * 3 + b2 + 2) >> 2;
272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
273 uint16* d, int dst_width) {
274 const uint16* s = src_ptr;
275 const uint16* t = src_ptr + src_stride;
277 assert((dst_width % 3 == 0) && (dst_width > 0));
278 for (x = 0; x < dst_width; x += 3) {
279 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
280 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
281 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
282 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
283 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
284 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
285 d[0] = (a0 * 3 + b0 + 2) >> 2;
286 d[1] = (a1 * 3 + b1 + 2) >> 2;
287 d[2] = (a2 * 3 + b2 + 2) >> 2;
294 // Filter rows 1 and 2 together, 1 : 1
295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
296 uint8* d, int dst_width) {
297 const uint8* s = src_ptr;
298 const uint8* t = src_ptr + src_stride;
300 assert((dst_width % 3 == 0) && (dst_width > 0));
301 for (x = 0; x < dst_width; x += 3) {
302 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
303 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
304 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
305 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
306 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
307 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
308 d[0] = (a0 + b0 + 1) >> 1;
309 d[1] = (a1 + b1 + 1) >> 1;
310 d[2] = (a2 + b2 + 1) >> 1;
317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
318 uint16* d, int dst_width) {
319 const uint16* s = src_ptr;
320 const uint16* t = src_ptr + src_stride;
322 assert((dst_width % 3 == 0) && (dst_width > 0));
323 for (x = 0; x < dst_width; x += 3) {
324 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
325 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
326 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
327 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
328 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
329 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
330 d[0] = (a0 + b0 + 1) >> 1;
331 d[1] = (a1 + b1 + 1) >> 1;
332 d[2] = (a2 + b2 + 1) >> 1;
339 // Scales a single row of pixels using point sampling.
340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
341 int dst_width, int x, int dx) {
343 for (j = 0; j < dst_width - 1; j += 2) {
344 dst_ptr[0] = src_ptr[x >> 16];
346 dst_ptr[1] = src_ptr[x >> 16];
351 dst_ptr[0] = src_ptr[x >> 16];
355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
356 int dst_width, int x, int dx) {
358 for (j = 0; j < dst_width - 1; j += 2) {
359 dst_ptr[0] = src_ptr[x >> 16];
361 dst_ptr[1] = src_ptr[x >> 16];
366 dst_ptr[0] = src_ptr[x >> 16];
370 // Scales a single row of pixels up by 2x using point sampling.
371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
372 int dst_width, int x, int dx) {
374 for (j = 0; j < dst_width - 1; j += 2) {
375 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
380 dst_ptr[0] = src_ptr[0];
384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
385 int dst_width, int x, int dx) {
387 for (j = 0; j < dst_width - 1; j += 2) {
388 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
393 dst_ptr[0] = src_ptr[0];
397 // (1-f)a + fb can be replaced with a + f(b-a)
398 #define BLENDER(a, b, f) (uint8)((int)(a) + \
399 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
402 int dst_width, int x, int dx) {
404 for (j = 0; j < dst_width - 1; j += 2) {
407 int b = src_ptr[xi + 1];
408 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
413 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
420 int b = src_ptr[xi + 1];
421 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
426 int dst_width, int x32, int dx) {
427 int64 x = (int64)(x32);
429 for (j = 0; j < dst_width - 1; j += 2) {
432 int b = src_ptr[xi + 1];
433 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
438 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
445 int b = src_ptr[xi + 1];
446 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
451 #define BLENDER(a, b, f) (uint16)((int)(a) + \
452 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
455 int dst_width, int x, int dx) {
457 for (j = 0; j < dst_width - 1; j += 2) {
460 int b = src_ptr[xi + 1];
461 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
466 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
473 int b = src_ptr[xi + 1];
474 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
479 int dst_width, int x32, int dx) {
480 int64 x = (int64)(x32);
482 for (j = 0; j < dst_width - 1; j += 2) {
485 int b = src_ptr[xi + 1];
486 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
491 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
498 int b = src_ptr[xi + 1];
499 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
505 uint8* dst, int dst_width) {
507 assert(dst_width % 3 == 0);
508 for (x = 0; x < dst_width; x += 3) {
517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
518 uint16* dst, int dst_width) {
520 assert(dst_width % 3 == 0);
521 for (x = 0; x < dst_width; x += 3) {
531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
532 ptrdiff_t src_stride,
533 uint8* dst_ptr, int dst_width) {
534 intptr_t stride = src_stride;
536 assert((dst_width % 3 == 0) && (dst_width > 0));
537 for (i = 0; i < dst_width; i += 3) {
538 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
539 src_ptr[stride + 0] + src_ptr[stride + 1] +
540 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
541 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
543 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
544 src_ptr[stride + 3] + src_ptr[stride + 4] +
545 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
546 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
548 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
549 src_ptr[stride + 6] + src_ptr[stride + 7] +
550 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
558 ptrdiff_t src_stride,
559 uint16* dst_ptr, int dst_width) {
560 intptr_t stride = src_stride;
562 assert((dst_width % 3 == 0) && (dst_width > 0));
563 for (i = 0; i < dst_width; i += 3) {
564 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
565 src_ptr[stride + 0] + src_ptr[stride + 1] +
566 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
567 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
569 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
570 src_ptr[stride + 3] + src_ptr[stride + 4] +
571 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
572 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
574 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
575 src_ptr[stride + 6] + src_ptr[stride + 7] +
576 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
585 uint8* dst_ptr, int dst_width) {
586 intptr_t stride = src_stride;
588 assert((dst_width % 3 == 0) && (dst_width > 0));
589 for (i = 0; i < dst_width; i += 3) {
590 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
591 src_ptr[stride + 0] + src_ptr[stride + 1] +
592 src_ptr[stride + 2]) * (65536 / 6) >> 16;
593 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
594 src_ptr[stride + 3] + src_ptr[stride + 4] +
595 src_ptr[stride + 5]) * (65536 / 6) >> 16;
596 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
597 src_ptr[stride + 6] + src_ptr[stride + 7]) *
604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
605 uint16* dst_ptr, int dst_width) {
606 intptr_t stride = src_stride;
608 assert((dst_width % 3 == 0) && (dst_width > 0));
609 for (i = 0; i < dst_width; i += 3) {
610 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
611 src_ptr[stride + 0] + src_ptr[stride + 1] +
612 src_ptr[stride + 2]) * (65536 / 6) >> 16;
613 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
614 src_ptr[stride + 3] + src_ptr[stride + 4] +
615 src_ptr[stride + 5]) * (65536 / 6) >> 16;
616 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
617 src_ptr[stride + 6] + src_ptr[stride + 7]) *
624 void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
626 assert(src_width > 0);
627 for (x = 0; x < src_width - 1; x += 2) {
628 dst_ptr[0] += src_ptr[0];
629 dst_ptr[1] += src_ptr[1];
634 dst_ptr[0] += src_ptr[0];
638 void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
640 assert(src_width > 0);
641 for (x = 0; x < src_width - 1; x += 2) {
642 dst_ptr[0] += src_ptr[0];
643 dst_ptr[1] += src_ptr[1];
648 dst_ptr[0] += src_ptr[0];
652 void ScaleARGBRowDown2_C(const uint8* src_argb,
653 ptrdiff_t src_stride,
654 uint8* dst_argb, int dst_width) {
655 const uint32* src = (const uint32*)(src_argb);
656 uint32* dst = (uint32*)(dst_argb);
659 for (x = 0; x < dst_width - 1; x += 2) {
670 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
671 ptrdiff_t src_stride,
672 uint8* dst_argb, int dst_width) {
674 for (x = 0; x < dst_width; ++x) {
675 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
676 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
677 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
678 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
684 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
685 uint8* dst_argb, int dst_width) {
687 for (x = 0; x < dst_width; ++x) {
688 dst_argb[0] = (src_argb[0] + src_argb[4] +
689 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
690 dst_argb[1] = (src_argb[1] + src_argb[5] +
691 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
692 dst_argb[2] = (src_argb[2] + src_argb[6] +
693 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
694 dst_argb[3] = (src_argb[3] + src_argb[7] +
695 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
701 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
703 uint8* dst_argb, int dst_width) {
704 const uint32* src = (const uint32*)(src_argb);
705 uint32* dst = (uint32*)(dst_argb);
708 for (x = 0; x < dst_width - 1; x += 2) {
710 dst[1] = src[src_stepx];
711 src += src_stepx * 2;
719 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
720 ptrdiff_t src_stride,
722 uint8* dst_argb, int dst_width) {
724 for (x = 0; x < dst_width; ++x) {
725 dst_argb[0] = (src_argb[0] + src_argb[4] +
726 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
727 dst_argb[1] = (src_argb[1] + src_argb[5] +
728 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
729 dst_argb[2] = (src_argb[2] + src_argb[6] +
730 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
731 dst_argb[3] = (src_argb[3] + src_argb[7] +
732 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
733 src_argb += src_stepx * 4;
738 // Scales a single row of pixels using point sampling.
739 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
740 int dst_width, int x, int dx) {
741 const uint32* src = (const uint32*)(src_argb);
742 uint32* dst = (uint32*)(dst_argb);
744 for (j = 0; j < dst_width - 1; j += 2) {
745 dst[0] = src[x >> 16];
747 dst[1] = src[x >> 16];
752 dst[0] = src[x >> 16];
756 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
757 int dst_width, int x32, int dx) {
758 int64 x = (int64)(x32);
759 const uint32* src = (const uint32*)(src_argb);
760 uint32* dst = (uint32*)(dst_argb);
762 for (j = 0; j < dst_width - 1; j += 2) {
763 dst[0] = src[x >> 16];
765 dst[1] = src[x >> 16];
770 dst[0] = src[x >> 16];
774 // Scales a single row of pixels up by 2x using point sampling.
775 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
776 int dst_width, int x, int dx) {
777 const uint32* src = (const uint32*)(src_argb);
778 uint32* dst = (uint32*)(dst_argb);
780 for (j = 0; j < dst_width - 1; j += 2) {
781 dst[1] = dst[0] = src[0];
790 // Mimics SSSE3 blender
791 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
792 #define BLENDERC(a, b, f, s) (uint32)( \
793 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
794 #define BLENDER(a, b, f) \
795 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
796 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
798 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
799 int dst_width, int x, int dx) {
800 const uint32* src = (const uint32*)(src_argb);
801 uint32* dst = (uint32*)(dst_argb);
803 for (j = 0; j < dst_width - 1; j += 2) {
805 int xf = (x >> 9) & 0x7f;
807 uint32 b = src[xi + 1];
808 dst[0] = BLENDER(a, b, xf);
811 xf = (x >> 9) & 0x7f;
814 dst[1] = BLENDER(a, b, xf);
820 int xf = (x >> 9) & 0x7f;
822 uint32 b = src[xi + 1];
823 dst[0] = BLENDER(a, b, xf);
827 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
828 int dst_width, int x32, int dx) {
829 int64 x = (int64)(x32);
830 const uint32* src = (const uint32*)(src_argb);
831 uint32* dst = (uint32*)(dst_argb);
833 for (j = 0; j < dst_width - 1; j += 2) {
835 int xf = (x >> 9) & 0x7f;
837 uint32 b = src[xi + 1];
838 dst[0] = BLENDER(a, b, xf);
841 xf = (x >> 9) & 0x7f;
844 dst[1] = BLENDER(a, b, xf);
850 int xf = (x >> 9) & 0x7f;
852 uint32 b = src[xi + 1];
853 dst[0] = BLENDER(a, b, xf);
860 // Scale plane vertically with bilinear interpolation.
861 void ScalePlaneVertical(int src_height,
862 int dst_width, int dst_height,
863 int src_stride, int dst_stride,
864 const uint8* src_argb, uint8* dst_argb,
865 int x, int y, int dy,
866 int bpp, enum FilterMode filtering) {
867 // TODO(fbarchard): Allow higher bpp.
868 int dst_width_bytes = dst_width * bpp;
869 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
870 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
872 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
874 assert(bpp >= 1 && bpp <= 4);
875 assert(src_height != 0);
876 assert(dst_width > 0);
877 assert(dst_height > 0);
878 src_argb += (x >> 16) * bpp;
879 #if defined(HAS_INTERPOLATEROW_SSE2)
880 if (TestCpuFlag(kCpuHasSSE2)) {
881 InterpolateRow = InterpolateRow_Any_SSE2;
882 if (IS_ALIGNED(dst_width_bytes, 16)) {
883 InterpolateRow = InterpolateRow_SSE2;
887 #if defined(HAS_INTERPOLATEROW_SSSE3)
888 if (TestCpuFlag(kCpuHasSSSE3)) {
889 InterpolateRow = InterpolateRow_Any_SSSE3;
890 if (IS_ALIGNED(dst_width_bytes, 16)) {
891 InterpolateRow = InterpolateRow_SSSE3;
895 #if defined(HAS_INTERPOLATEROW_AVX2)
896 if (TestCpuFlag(kCpuHasAVX2)) {
897 InterpolateRow = InterpolateRow_Any_AVX2;
898 if (IS_ALIGNED(dst_width_bytes, 32)) {
899 InterpolateRow = InterpolateRow_AVX2;
903 #if defined(HAS_INTERPOLATEROW_NEON)
904 if (TestCpuFlag(kCpuHasNEON)) {
905 InterpolateRow = InterpolateRow_Any_NEON;
906 if (IS_ALIGNED(dst_width_bytes, 16)) {
907 InterpolateRow = InterpolateRow_NEON;
911 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
912 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
913 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
914 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
915 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
916 if (IS_ALIGNED(dst_width_bytes, 4)) {
917 InterpolateRow = InterpolateRow_MIPS_DSPR2;
921 for (j = 0; j < dst_height; ++j) {
928 yf = filtering ? ((y >> 8) & 255) : 0;
929 InterpolateRow(dst_argb, src_argb + yi * src_stride,
930 src_stride, dst_width_bytes, yf);
931 dst_argb += dst_stride;
935 void ScalePlaneVertical_16(int src_height,
936 int dst_width, int dst_height,
937 int src_stride, int dst_stride,
938 const uint16* src_argb, uint16* dst_argb,
939 int x, int y, int dy,
940 int wpp, enum FilterMode filtering) {
941 // TODO(fbarchard): Allow higher wpp.
942 int dst_width_words = dst_width * wpp;
943 void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
944 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
946 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
948 assert(wpp >= 1 && wpp <= 2);
949 assert(src_height != 0);
950 assert(dst_width > 0);
951 assert(dst_height > 0);
952 src_argb += (x >> 16) * wpp;
953 #if defined(HAS_INTERPOLATEROW_16_SSE2)
954 if (TestCpuFlag(kCpuHasSSE2)) {
955 InterpolateRow = InterpolateRow_Any_16_SSE2;
956 if (IS_ALIGNED(dst_width_bytes, 16)) {
957 InterpolateRow = InterpolateRow_16_SSE2;
961 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
962 if (TestCpuFlag(kCpuHasSSSE3)) {
963 InterpolateRow = InterpolateRow_Any_16_SSSE3;
964 if (IS_ALIGNED(dst_width_bytes, 16)) {
965 InterpolateRow = InterpolateRow_16_SSSE3;
969 #if defined(HAS_INTERPOLATEROW_16_AVX2)
970 if (TestCpuFlag(kCpuHasAVX2)) {
971 InterpolateRow = InterpolateRow_Any_16_AVX2;
972 if (IS_ALIGNED(dst_width_bytes, 32)) {
973 InterpolateRow = InterpolateRow_16_AVX2;
977 #if defined(HAS_INTERPOLATEROW_16_NEON)
978 if (TestCpuFlag(kCpuHasNEON)) {
979 InterpolateRow = InterpolateRow_Any_16_NEON;
980 if (IS_ALIGNED(dst_width_bytes, 16)) {
981 InterpolateRow = InterpolateRow_16_NEON;
985 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
986 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
987 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
988 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
989 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
990 if (IS_ALIGNED(dst_width_bytes, 4)) {
991 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
995 for (j = 0; j < dst_height; ++j) {
1002 yf = filtering ? ((y >> 8) & 255) : 0;
1003 InterpolateRow(dst_argb, src_argb + yi * src_stride,
1004 src_stride, dst_width_words, yf);
1005 dst_argb += dst_stride;
1010 // Simplify the filtering based on scale factors.
1011 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
1012 int dst_width, int dst_height,
1013 enum FilterMode filtering) {
1014 if (src_width < 0) {
1015 src_width = -src_width;
1017 if (src_height < 0) {
1018 src_height = -src_height;
1020 if (filtering == kFilterBox) {
1021 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1022 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1023 filtering = kFilterBilinear;
1026 if (filtering == kFilterBilinear) {
1027 if (src_height == 1) {
1028 filtering = kFilterLinear;
1030 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1031 if (dst_height == src_height || dst_height * 3 == src_height) {
1032 filtering = kFilterLinear;
1034 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1035 // avoid reading 2 pixels horizontally that causes memory exception.
1036 if (src_width == 1) {
1037 filtering = kFilterNone;
1040 if (filtering == kFilterLinear) {
1041 if (src_width == 1) {
1042 filtering = kFilterNone;
1044 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1045 if (dst_width == src_width || dst_width * 3 == src_width) {
1046 filtering = kFilterNone;
1052 // Divide num by div and return as 16.16 fixed point result.
1053 int FixedDiv_C(int num, int div) {
1054 return (int)(((int64)(num) << 16) / div);
1057 // Divide num by div and return as 16.16 fixed point result.
1058 int FixedDiv1_C(int num, int div) {
1059 return (int)((((int64)(num) << 16) - 0x00010001) /
1063 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1065 // Compute slope values for stepping.
1066 void ScaleSlope(int src_width, int src_height,
1067 int dst_width, int dst_height,
1068 enum FilterMode filtering,
1069 int* x, int* y, int* dx, int* dy) {
1074 assert(src_width != 0);
1075 assert(src_height != 0);
1076 assert(dst_width > 0);
1077 assert(dst_height > 0);
1078 // Check for 1 pixel and avoid FixedDiv overflow.
1079 if (dst_width == 1 && src_width >= 32768) {
1080 dst_width = src_width;
1082 if (dst_height == 1 && src_height >= 32768) {
1083 dst_height = src_height;
1085 if (filtering == kFilterBox) {
1086 // Scale step for point sampling duplicates all pixels equally.
1087 *dx = FixedDiv(Abs(src_width), dst_width);
1088 *dy = FixedDiv(src_height, dst_height);
1091 } else if (filtering == kFilterBilinear) {
1092 // Scale step for bilinear sampling renders last pixel once for upsample.
1093 if (dst_width <= Abs(src_width)) {
1094 *dx = FixedDiv(Abs(src_width), dst_width);
1095 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1096 } else if (dst_width > 1) {
1097 *dx = FixedDiv1(Abs(src_width), dst_width);
1100 if (dst_height <= src_height) {
1101 *dy = FixedDiv(src_height, dst_height);
1102 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1103 } else if (dst_height > 1) {
1104 *dy = FixedDiv1(src_height, dst_height);
1107 } else if (filtering == kFilterLinear) {
1108 // Scale step for bilinear sampling renders last pixel once for upsample.
1109 if (dst_width <= Abs(src_width)) {
1110 *dx = FixedDiv(Abs(src_width), dst_width);
1111 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1112 } else if (dst_width > 1) {
1113 *dx = FixedDiv1(Abs(src_width), dst_width);
1116 *dy = FixedDiv(src_height, dst_height);
1119 // Scale step for point sampling duplicates all pixels equally.
1120 *dx = FixedDiv(Abs(src_width), dst_width);
1121 *dy = FixedDiv(src_height, dst_height);
1122 *x = CENTERSTART(*dx, 0);
1123 *y = CENTERSTART(*dy, 0);
1125 // Negative src_width means horizontally mirror.
1126 if (src_width < 0) {
1127 *x += (dst_width - 1) * *dx;
1129 // src_width = -src_width; // Caller must do this.
1136 } // namespace libyuv