Increase the first stage dynamic range by 4 times, and reduce it
back with proper rounding before applying the second stage. Hence
it still fits in the given dynamic range and slightly improves
the key frame coding performance.
Change-Id: Ia4c5907446f20a95dc3de079c314b3ad1221d8aa
// column transform
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * short_pitch + i];
+ temp_in[j] = input[j * short_pitch + i] << 2;
fwdc(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- outptr[j * 16 + i] = temp_out[j];
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
// row transform