2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "./vp9_rtcd.h"
15 #include "./vpx_config.h"
16 #include "./vpx_dsp_rtcd.h"
18 #include "vp9/common/vp9_blockd.h"
19 #include "vp9/common/vp9_idct.h"
20 #include "vpx_dsp/fwd_txfm.h"
21 #include "vpx_ports/mem.h"
23 static void fdct4(const tran_low_t *input, tran_low_t *output) {
25 tran_high_t temp1, temp2;
27 step[0] = input[0] + input[3];
28 step[1] = input[1] + input[2];
29 step[2] = input[1] - input[2];
30 step[3] = input[0] - input[3];
32 temp1 = (step[0] + step[1]) * cospi_16_64;
33 temp2 = (step[0] - step[1]) * cospi_16_64;
34 output[0] = (tran_low_t)fdct_round_shift(temp1);
35 output[2] = (tran_low_t)fdct_round_shift(temp2);
36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
38 output[1] = (tran_low_t)fdct_round_shift(temp1);
39 output[3] = (tran_low_t)fdct_round_shift(temp2);
42 static void fdct8(const tran_low_t *input, tran_low_t *output) {
43 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
44 tran_high_t t0, t1, t2, t3; // needs32
45 tran_high_t x0, x1, x2, x3; // canbe16
48 s0 = input[0] + input[7];
49 s1 = input[1] + input[6];
50 s2 = input[2] + input[5];
51 s3 = input[3] + input[4];
52 s4 = input[3] - input[4];
53 s5 = input[2] - input[5];
54 s6 = input[1] - input[6];
55 s7 = input[0] - input[7];
62 t0 = (x0 + x1) * cospi_16_64;
63 t1 = (x0 - x1) * cospi_16_64;
64 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
65 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
66 output[0] = (tran_low_t)fdct_round_shift(t0);
67 output[2] = (tran_low_t)fdct_round_shift(t2);
68 output[4] = (tran_low_t)fdct_round_shift(t1);
69 output[6] = (tran_low_t)fdct_round_shift(t3);
72 t0 = (s6 - s5) * cospi_16_64;
73 t1 = (s6 + s5) * cospi_16_64;
74 t2 = (tran_low_t)fdct_round_shift(t0);
75 t3 = (tran_low_t)fdct_round_shift(t1);
84 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
85 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
86 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
87 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
88 output[1] = (tran_low_t)fdct_round_shift(t0);
89 output[3] = (tran_low_t)fdct_round_shift(t2);
90 output[5] = (tran_low_t)fdct_round_shift(t1);
91 output[7] = (tran_low_t)fdct_round_shift(t3);
94 static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
95 tran_high_t step1[8]; // canbe16
96 tran_high_t step2[8]; // canbe16
97 tran_high_t step3[8]; // canbe16
98 tran_high_t input[8]; // canbe16
99 tran_high_t temp1, temp2; // needs32
102 input[0] = in[0] + in[15];
103 input[1] = in[1] + in[14];
104 input[2] = in[2] + in[13];
105 input[3] = in[3] + in[12];
106 input[4] = in[4] + in[11];
107 input[5] = in[5] + in[10];
108 input[6] = in[6] + in[ 9];
109 input[7] = in[7] + in[ 8];
111 step1[0] = in[7] - in[ 8];
112 step1[1] = in[6] - in[ 9];
113 step1[2] = in[5] - in[10];
114 step1[3] = in[4] - in[11];
115 step1[4] = in[3] - in[12];
116 step1[5] = in[2] - in[13];
117 step1[6] = in[1] - in[14];
118 step1[7] = in[0] - in[15];
120 // fdct8(step, step);
122 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
123 tran_high_t t0, t1, t2, t3; // needs32
124 tran_high_t x0, x1, x2, x3; // canbe16
127 s0 = input[0] + input[7];
128 s1 = input[1] + input[6];
129 s2 = input[2] + input[5];
130 s3 = input[3] + input[4];
131 s4 = input[3] - input[4];
132 s5 = input[2] - input[5];
133 s6 = input[1] - input[6];
134 s7 = input[0] - input[7];
136 // fdct4(step, step);
141 t0 = (x0 + x1) * cospi_16_64;
142 t1 = (x0 - x1) * cospi_16_64;
143 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
144 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
145 out[0] = (tran_low_t)fdct_round_shift(t0);
146 out[4] = (tran_low_t)fdct_round_shift(t2);
147 out[8] = (tran_low_t)fdct_round_shift(t1);
148 out[12] = (tran_low_t)fdct_round_shift(t3);
151 t0 = (s6 - s5) * cospi_16_64;
152 t1 = (s6 + s5) * cospi_16_64;
153 t2 = fdct_round_shift(t0);
154 t3 = fdct_round_shift(t1);
163 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
164 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
165 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
166 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
167 out[2] = (tran_low_t)fdct_round_shift(t0);
168 out[6] = (tran_low_t)fdct_round_shift(t2);
169 out[10] = (tran_low_t)fdct_round_shift(t1);
170 out[14] = (tran_low_t)fdct_round_shift(t3);
174 temp1 = (step1[5] - step1[2]) * cospi_16_64;
175 temp2 = (step1[4] - step1[3]) * cospi_16_64;
176 step2[2] = fdct_round_shift(temp1);
177 step2[3] = fdct_round_shift(temp2);
178 temp1 = (step1[4] + step1[3]) * cospi_16_64;
179 temp2 = (step1[5] + step1[2]) * cospi_16_64;
180 step2[4] = fdct_round_shift(temp1);
181 step2[5] = fdct_round_shift(temp2);
184 step3[0] = step1[0] + step2[3];
185 step3[1] = step1[1] + step2[2];
186 step3[2] = step1[1] - step2[2];
187 step3[3] = step1[0] - step2[3];
188 step3[4] = step1[7] - step2[4];
189 step3[5] = step1[6] - step2[5];
190 step3[6] = step1[6] + step2[5];
191 step3[7] = step1[7] + step2[4];
194 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
195 temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
196 step2[1] = fdct_round_shift(temp1);
197 step2[2] = fdct_round_shift(temp2);
198 temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
199 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
200 step2[5] = fdct_round_shift(temp1);
201 step2[6] = fdct_round_shift(temp2);
204 step1[0] = step3[0] + step2[1];
205 step1[1] = step3[0] - step2[1];
206 step1[2] = step3[3] + step2[2];
207 step1[3] = step3[3] - step2[2];
208 step1[4] = step3[4] - step2[5];
209 step1[5] = step3[4] + step2[5];
210 step1[6] = step3[7] - step2[6];
211 step1[7] = step3[7] + step2[6];
214 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
215 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
216 out[1] = (tran_low_t)fdct_round_shift(temp1);
217 out[9] = (tran_low_t)fdct_round_shift(temp2);
219 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
220 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
221 out[5] = (tran_low_t)fdct_round_shift(temp1);
222 out[13] = (tran_low_t)fdct_round_shift(temp2);
224 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
225 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
226 out[3] = (tran_low_t)fdct_round_shift(temp1);
227 out[11] = (tran_low_t)fdct_round_shift(temp2);
229 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
230 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
231 out[7] = (tran_low_t)fdct_round_shift(temp1);
232 out[15] = (tran_low_t)fdct_round_shift(temp2);
235 static void fadst4(const tran_low_t *input, tran_low_t *output) {
236 tran_high_t x0, x1, x2, x3;
237 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
244 if (!(x0 | x1 | x2 | x3)) {
245 output[0] = output[1] = output[2] = output[3] = 0;
268 // 1-D transform scaling factor is sqrt(2).
269 output[0] = (tran_low_t)fdct_round_shift(s0);
270 output[1] = (tran_low_t)fdct_round_shift(s1);
271 output[2] = (tran_low_t)fdct_round_shift(s2);
272 output[3] = (tran_low_t)fdct_round_shift(s3);
275 static void fadst8(const tran_low_t *input, tran_low_t *output) {
276 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
278 tran_high_t x0 = input[7];
279 tran_high_t x1 = input[0];
280 tran_high_t x2 = input[5];
281 tran_high_t x3 = input[2];
282 tran_high_t x4 = input[3];
283 tran_high_t x5 = input[4];
284 tran_high_t x6 = input[1];
285 tran_high_t x7 = input[6];
288 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
289 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
290 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
291 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
292 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
293 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
294 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
295 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
297 x0 = fdct_round_shift(s0 + s4);
298 x1 = fdct_round_shift(s1 + s5);
299 x2 = fdct_round_shift(s2 + s6);
300 x3 = fdct_round_shift(s3 + s7);
301 x4 = fdct_round_shift(s0 - s4);
302 x5 = fdct_round_shift(s1 - s5);
303 x6 = fdct_round_shift(s2 - s6);
304 x7 = fdct_round_shift(s3 - s7);
311 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
312 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
313 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
314 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
320 x4 = fdct_round_shift(s4 + s6);
321 x5 = fdct_round_shift(s5 + s7);
322 x6 = fdct_round_shift(s4 - s6);
323 x7 = fdct_round_shift(s5 - s7);
326 s2 = cospi_16_64 * (x2 + x3);
327 s3 = cospi_16_64 * (x2 - x3);
328 s6 = cospi_16_64 * (x6 + x7);
329 s7 = cospi_16_64 * (x6 - x7);
331 x2 = fdct_round_shift(s2);
332 x3 = fdct_round_shift(s3);
333 x6 = fdct_round_shift(s6);
334 x7 = fdct_round_shift(s7);
336 output[0] = (tran_low_t)x0;
337 output[1] = (tran_low_t)-x4;
338 output[2] = (tran_low_t)x6;
339 output[3] = (tran_low_t)-x2;
340 output[4] = (tran_low_t)x3;
341 output[5] = (tran_low_t)-x7;
342 output[6] = (tran_low_t)x5;
343 output[7] = (tran_low_t)-x1;
346 static void fadst16(const tran_low_t *input, tran_low_t *output) {
347 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
348 tran_high_t s9, s10, s11, s12, s13, s14, s15;
350 tran_high_t x0 = input[15];
351 tran_high_t x1 = input[0];
352 tran_high_t x2 = input[13];
353 tran_high_t x3 = input[2];
354 tran_high_t x4 = input[11];
355 tran_high_t x5 = input[4];
356 tran_high_t x6 = input[9];
357 tran_high_t x7 = input[6];
358 tran_high_t x8 = input[7];
359 tran_high_t x9 = input[8];
360 tran_high_t x10 = input[5];
361 tran_high_t x11 = input[10];
362 tran_high_t x12 = input[3];
363 tran_high_t x13 = input[12];
364 tran_high_t x14 = input[1];
365 tran_high_t x15 = input[14];
368 s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
369 s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
370 s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
371 s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
372 s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
373 s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
374 s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
375 s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
376 s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
377 s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
378 s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
379 s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
380 s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
381 s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
382 s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
383 s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
385 x0 = fdct_round_shift(s0 + s8);
386 x1 = fdct_round_shift(s1 + s9);
387 x2 = fdct_round_shift(s2 + s10);
388 x3 = fdct_round_shift(s3 + s11);
389 x4 = fdct_round_shift(s4 + s12);
390 x5 = fdct_round_shift(s5 + s13);
391 x6 = fdct_round_shift(s6 + s14);
392 x7 = fdct_round_shift(s7 + s15);
393 x8 = fdct_round_shift(s0 - s8);
394 x9 = fdct_round_shift(s1 - s9);
395 x10 = fdct_round_shift(s2 - s10);
396 x11 = fdct_round_shift(s3 - s11);
397 x12 = fdct_round_shift(s4 - s12);
398 x13 = fdct_round_shift(s5 - s13);
399 x14 = fdct_round_shift(s6 - s14);
400 x15 = fdct_round_shift(s7 - s15);
411 s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
412 s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
413 s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
414 s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
415 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
416 s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
417 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
418 s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
428 x8 = fdct_round_shift(s8 + s12);
429 x9 = fdct_round_shift(s9 + s13);
430 x10 = fdct_round_shift(s10 + s14);
431 x11 = fdct_round_shift(s11 + s15);
432 x12 = fdct_round_shift(s8 - s12);
433 x13 = fdct_round_shift(s9 - s13);
434 x14 = fdct_round_shift(s10 - s14);
435 x15 = fdct_round_shift(s11 - s15);
442 s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
443 s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
444 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
445 s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
450 s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
451 s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
452 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
453 s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
459 x4 = fdct_round_shift(s4 + s6);
460 x5 = fdct_round_shift(s5 + s7);
461 x6 = fdct_round_shift(s4 - s6);
462 x7 = fdct_round_shift(s5 - s7);
467 x12 = fdct_round_shift(s12 + s14);
468 x13 = fdct_round_shift(s13 + s15);
469 x14 = fdct_round_shift(s12 - s14);
470 x15 = fdct_round_shift(s13 - s15);
473 s2 = (- cospi_16_64) * (x2 + x3);
474 s3 = cospi_16_64 * (x2 - x3);
475 s6 = cospi_16_64 * (x6 + x7);
476 s7 = cospi_16_64 * (- x6 + x7);
477 s10 = cospi_16_64 * (x10 + x11);
478 s11 = cospi_16_64 * (- x10 + x11);
479 s14 = (- cospi_16_64) * (x14 + x15);
480 s15 = cospi_16_64 * (x14 - x15);
482 x2 = fdct_round_shift(s2);
483 x3 = fdct_round_shift(s3);
484 x6 = fdct_round_shift(s6);
485 x7 = fdct_round_shift(s7);
486 x10 = fdct_round_shift(s10);
487 x11 = fdct_round_shift(s11);
488 x14 = fdct_round_shift(s14);
489 x15 = fdct_round_shift(s15);
491 output[0] = (tran_low_t)x0;
492 output[1] = (tran_low_t)-x8;
493 output[2] = (tran_low_t)x12;
494 output[3] = (tran_low_t)-x4;
495 output[4] = (tran_low_t)x6;
496 output[5] = (tran_low_t)x14;
497 output[6] = (tran_low_t)x10;
498 output[7] = (tran_low_t)x2;
499 output[8] = (tran_low_t)x3;
500 output[9] = (tran_low_t)x11;
501 output[10] = (tran_low_t)x15;
502 output[11] = (tran_low_t)x7;
503 output[12] = (tran_low_t)x5;
504 output[13] = (tran_low_t)-x13;
505 output[14] = (tran_low_t)x9;
506 output[15] = (tran_low_t)-x1;
509 static const transform_2d FHT_4[] = {
510 { fdct4, fdct4 }, // DCT_DCT = 0
511 { fadst4, fdct4 }, // ADST_DCT = 1
512 { fdct4, fadst4 }, // DCT_ADST = 2
513 { fadst4, fadst4 } // ADST_ADST = 3
516 static const transform_2d FHT_8[] = {
517 { fdct8, fdct8 }, // DCT_DCT = 0
518 { fadst8, fdct8 }, // ADST_DCT = 1
519 { fdct8, fadst8 }, // DCT_ADST = 2
520 { fadst8, fadst8 } // ADST_ADST = 3
523 static const transform_2d FHT_16[] = {
524 { fdct16, fdct16 }, // DCT_DCT = 0
525 { fadst16, fdct16 }, // ADST_DCT = 1
526 { fdct16, fadst16 }, // DCT_ADST = 2
527 { fadst16, fadst16 } // ADST_ADST = 3
530 void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
531 int stride, int tx_type) {
532 if (tx_type == DCT_DCT) {
533 vpx_fdct4x4_c(input, output, stride);
535 tran_low_t out[4 * 4];
537 tran_low_t temp_in[4], temp_out[4];
538 const transform_2d ht = FHT_4[tx_type];
541 for (i = 0; i < 4; ++i) {
542 for (j = 0; j < 4; ++j)
543 temp_in[j] = input[j * stride + i] * 16;
544 if (i == 0 && temp_in[0])
546 ht.cols(temp_in, temp_out);
547 for (j = 0; j < 4; ++j)
548 out[j * 4 + i] = temp_out[j];
552 for (i = 0; i < 4; ++i) {
553 for (j = 0; j < 4; ++j)
554 temp_in[j] = out[j + i * 4];
555 ht.rows(temp_in, temp_out);
556 for (j = 0; j < 4; ++j)
557 output[j + i * 4] = (temp_out[j] + 1) >> 2;
562 void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
563 tran_low_t *coeff_ptr, intptr_t n_coeffs,
565 const int16_t *zbin_ptr, const int16_t *round_ptr,
566 const int16_t *quant_ptr,
567 const int16_t *quant_shift_ptr,
568 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
569 const int16_t *dequant_ptr,
571 const int16_t *scan, const int16_t *iscan) {
575 tran_low_t intermediate[64];
579 tran_low_t *output = intermediate;
580 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
581 tran_high_t t0, t1, t2, t3; // needs32
582 tran_high_t x0, x1, x2, x3; // canbe16
585 for (i = 0; i < 8; i++) {
587 s0 = (input[0 * stride] + input[7 * stride]) * 4;
588 s1 = (input[1 * stride] + input[6 * stride]) * 4;
589 s2 = (input[2 * stride] + input[5 * stride]) * 4;
590 s3 = (input[3 * stride] + input[4 * stride]) * 4;
591 s4 = (input[3 * stride] - input[4 * stride]) * 4;
592 s5 = (input[2 * stride] - input[5 * stride]) * 4;
593 s6 = (input[1 * stride] - input[6 * stride]) * 4;
594 s7 = (input[0 * stride] - input[7 * stride]) * 4;
596 // fdct4(step, step);
601 t0 = (x0 + x1) * cospi_16_64;
602 t1 = (x0 - x1) * cospi_16_64;
603 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
604 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
605 output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
606 output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
607 output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
608 output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
611 t0 = (s6 - s5) * cospi_16_64;
612 t1 = (s6 + s5) * cospi_16_64;
613 t2 = fdct_round_shift(t0);
614 t3 = fdct_round_shift(t1);
623 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
624 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
625 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
626 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
627 output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
628 output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
629 output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
630 output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
637 for (i = 0; i < 8; ++i) {
638 fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
639 for (j = 0; j < 8; ++j)
640 coeff_ptr[j + i * 8] /= 2;
643 // TODO(jingning) Decide the need of these arguments after the
644 // quantization process is completed.
646 (void)quant_shift_ptr;
649 memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
650 memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
653 // Quantization pass: All coefficients with index >= zero_flag are
654 // skippable. Note: zero_flag can be zero.
655 for (i = 0; i < n_coeffs; i++) {
656 const int rc = scan[i];
657 const int coeff = coeff_ptr[rc];
658 const int coeff_sign = (coeff >> 31);
659 const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
661 int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
662 tmp = (tmp * quant_ptr[rc != 0]) >> 16;
664 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
665 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
674 void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
675 int stride, int tx_type) {
676 if (tx_type == DCT_DCT) {
677 vpx_fdct8x8_c(input, output, stride);
681 tran_low_t temp_in[8], temp_out[8];
682 const transform_2d ht = FHT_8[tx_type];
685 for (i = 0; i < 8; ++i) {
686 for (j = 0; j < 8; ++j)
687 temp_in[j] = input[j * stride + i] * 4;
688 ht.cols(temp_in, temp_out);
689 for (j = 0; j < 8; ++j)
690 out[j * 8 + i] = temp_out[j];
694 for (i = 0; i < 8; ++i) {
695 for (j = 0; j < 8; ++j)
696 temp_in[j] = out[j + i * 8];
697 ht.rows(temp_in, temp_out);
698 for (j = 0; j < 8; ++j)
699 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
704 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
706 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
708 tran_high_t a1, b1, c1, d1, e1;
709 const int16_t *ip_pass0 = input;
710 const tran_low_t *ip = NULL;
711 tran_low_t *op = output;
713 for (i = 0; i < 4; i++) {
714 a1 = ip_pass0[0 * stride];
715 b1 = ip_pass0[1 * stride];
716 c1 = ip_pass0[2 * stride];
717 d1 = ip_pass0[3 * stride];
726 op[0] = (tran_low_t)a1;
727 op[4] = (tran_low_t)c1;
728 op[8] = (tran_low_t)d1;
729 op[12] = (tran_low_t)b1;
737 for (i = 0; i < 4; i++) {
750 op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
751 op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
752 op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
753 op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
760 void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
761 int stride, int tx_type) {
762 if (tx_type == DCT_DCT) {
763 vpx_fdct16x16_c(input, output, stride);
767 tran_low_t temp_in[16], temp_out[16];
768 const transform_2d ht = FHT_16[tx_type];
771 for (i = 0; i < 16; ++i) {
772 for (j = 0; j < 16; ++j)
773 temp_in[j] = input[j * stride + i] * 4;
774 ht.cols(temp_in, temp_out);
775 for (j = 0; j < 16; ++j)
776 out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
780 for (i = 0; i < 16; ++i) {
781 for (j = 0; j < 16; ++j)
782 temp_in[j] = out[j + i * 16];
783 ht.rows(temp_in, temp_out);
784 for (j = 0; j < 16; ++j)
785 output[j + i * 16] = temp_out[j];
790 #if CONFIG_VP9_HIGHBITDEPTH
791 void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output,
792 int stride, int tx_type) {
793 vp9_fht4x4_c(input, output, stride, tx_type);
796 void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output,
797 int stride, int tx_type) {
798 vp9_fht8x8_c(input, output, stride, tx_type);
801 void vp9_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
803 vp9_fwht4x4_c(input, output, stride);
806 void vp9_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
807 int stride, int tx_type) {
808 vp9_fht16x16_c(input, output, stride, tx_type);
810 #endif // CONFIG_VP9_HIGHBITDEPTH