/* Pass 1: process rows */
- row0 = *(__vector short *)&data[0];
- row1 = *(__vector short *)&data[8];
- row2 = *(__vector short *)&data[16];
- row3 = *(__vector short *)&data[24];
- row4 = *(__vector short *)&data[32];
- row5 = *(__vector short *)&data[40];
- row6 = *(__vector short *)&data[48];
- row7 = *(__vector short *)&data[56];
+ row0 = vec_ld(0, data);
+ row1 = vec_ld(16, data);
+ row2 = vec_ld(32, data);
+ row3 = vec_ld(48, data);
+ row4 = vec_ld(64, data);
+ row5 = vec_ld(80, data);
+ row6 = vec_ld(96, data);
+ row7 = vec_ld(112, data);
TRANSPOSE(row, col);
DO_FDCT();
- *(__vector short *)&data[0] = out0;
- *(__vector short *)&data[8] = out1;
- *(__vector short *)&data[16] = out2;
- *(__vector short *)&data[24] = out3;
- *(__vector short *)&data[32] = out4;
- *(__vector short *)&data[40] = out5;
- *(__vector short *)&data[48] = out6;
- *(__vector short *)&data[56] = out7;
+ vec_st(out0, 0, data);
+ vec_st(out1, 16, data);
+ vec_st(out2, 32, data);
+ vec_st(out3, 48, data);
+ vec_st(out4, 64, data);
+ vec_st(out5, 80, data);
+ vec_st(out6, 96, data);
+ vec_st(out7, 112, data);
}
/* Pass 1: process rows */
- row0 = *(__vector short *)&data[0];
- row1 = *(__vector short *)&data[8];
- row2 = *(__vector short *)&data[16];
- row3 = *(__vector short *)&data[24];
- row4 = *(__vector short *)&data[32];
- row5 = *(__vector short *)&data[40];
- row6 = *(__vector short *)&data[48];
- row7 = *(__vector short *)&data[56];
+ row0 = vec_ld(0, data);
+ row1 = vec_ld(16, data);
+ row2 = vec_ld(32, data);
+ row3 = vec_ld(48, data);
+ row4 = vec_ld(64, data);
+ row5 = vec_ld(80, data);
+ row6 = vec_ld(96, data);
+ row7 = vec_ld(112, data);
TRANSPOSE(row, col);
DO_FDCT_COLS();
- *(__vector short *)&data[0] = out0;
- *(__vector short *)&data[8] = out1;
- *(__vector short *)&data[16] = out2;
- *(__vector short *)&data[24] = out3;
- *(__vector short *)&data[32] = out4;
- *(__vector short *)&data[40] = out5;
- *(__vector short *)&data[48] = out6;
- *(__vector short *)&data[56] = out7;
+ vec_st(out0, 0, data);
+ vec_st(out1, 16, data);
+ vec_st(out2, 32, data);
+ vec_st(out3, 48, data);
+ vec_st(out4, 64, data);
+ vec_st(out5, 80, data);
+ vec_st(out6, 96, data);
+ vec_st(out7, 112, data);
}
z5, z10, z10s, z11, z12s, z13,
out0, out1, out2, out3, out4, out5, out6, out7;
__vector signed char outb;
- long long *outptr, *outbptr = (long long *)(&outb);
+ int *outptr;
/* Constants */
__vector short zero = { __8X(0) },
/* Pass 1: process columns */
- col0 = *(__vector short *)&coef_block[0];
- col1 = *(__vector short *)&coef_block[8];
- col2 = *(__vector short *)&coef_block[16];
- col3 = *(__vector short *)&coef_block[24];
- col4 = *(__vector short *)&coef_block[32];
- col5 = *(__vector short *)&coef_block[40];
- col6 = *(__vector short *)&coef_block[48];
- col7 = *(__vector short *)&coef_block[56];
+ col0 = vec_ld(0, coef_block);
+ col1 = vec_ld(16, coef_block);
+ col2 = vec_ld(32, coef_block);
+ col3 = vec_ld(48, coef_block);
+ col4 = vec_ld(64, coef_block);
+ col5 = vec_ld(80, coef_block);
+ col6 = vec_ld(96, coef_block);
+ col7 = vec_ld(112, coef_block);
tmp1 = vec_or(col1, col2);
tmp2 = vec_or(col3, col4);
TRANSPOSE(out, col);
- outb = vec_packs(col0, col1);
+ outb = vec_packs(col0, col0);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[0] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[1] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[0] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col2, col3);
+ outb = vec_packs(col1, col1);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[2] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[3] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[1] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col4, col5);
+ outb = vec_packs(col2, col2);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[4] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[5] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[2] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col6, col7);
+ outb = vec_packs(col3, col3);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[6] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[7] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[3] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col4, col4);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[4] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col5, col5);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[5] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col6, col6);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[6] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col7, col7);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[7] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
}
out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h,
out5l, out5h, out6l, out6h, out7l, out7h;
__vector signed char outb;
- long long *outptr, *outbptr = (long long *)(&outb);
+ int *outptr;
/* Constants */
__vector short zero16 = { __8X(0) },
TRANSPOSE(out, col);
- outb = vec_packs(col0, col1);
+ outb = vec_packs(col0, col0);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[0] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[1] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[0] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col2, col3);
+ outb = vec_packs(col1, col1);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[2] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[3] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[1] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col4, col5);
+ outb = vec_packs(col2, col2);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[4] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[5] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[2] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
- outb = vec_packs(col6, col7);
+ outb = vec_packs(col3, col3);
outb = vec_add(outb, pb_centerjsamp);
- outptr = (long long *)(output_buf[6] + output_col);
- *outptr = outbptr[0];
- outptr = (long long *)(output_buf[7] + output_col);
- *outptr = outbptr[1];
+ outptr = (int *)(output_buf[3] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col4, col4);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[4] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col5, col5);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[5] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col6, col6);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[6] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
+
+ outb = vec_packs(col7, col7);
+ outb = vec_add(outb, pb_centerjsamp);
+ outptr = (int *)(output_buf[7] + output_col);
+ vec_ste((__vector int)outb, 0, outptr);
+ vec_ste((__vector int)outb, 4, outptr);
}