--- /dev/null
+--- libmpeg2-0.5.1/libmpeg2/idct_mmx.c.orig 2011-05-12 17:30:42.028201708 -0300
++++ libmpeg2-0.5.1/libmpeg2/idct_mmx.c 2011-05-12 17:35:50.084201548 -0300
+@@ -39,6 +39,11 @@
+ #define rounder(bias) {round (bias), round (bias)}
+ #define rounder_sse2(bias) {round (bias), round (bias), round (bias), round (bias)}
+
++#if defined(__GNUC__)
++# define ATTR_VECTOR(n) __attribute__((vector_size(n)))
++#else
++# define ATTR_VECTOR(n) /**/
++#endif
+
+ #if 0
+ /* C row IDCT - it is just here to document the MMXEXT and MMX versions */
+@@ -464,10 +469,10 @@ static inline void sse2_idct_col (int16_
+ /* Almost identical to mmxext version: */
+ /* just do both 4x8 columns in paraller */
+
+- static const short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1};
+- static const short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2};
+- static const short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3};
+- static const short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4};
++ static const short t1_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T1,T1,T1,T1,T1,T1,T1,T1};
++ static const short t2_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T2,T2,T2,T2,T2,T2,T2,T2};
++ static const short t3_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T3,T3,T3,T3,T3,T3,T3,T3};
++ static const short c4_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {C4,C4,C4,C4,C4,C4,C4,C4};
+
+ #if defined(__x86_64__)
+
+@@ -710,10 +715,10 @@ static inline void sse2_idct_col (int16_
+ /* MMX column IDCT */
+ static inline void idct_col (int16_t * const col, const int offset)
+ {
+- static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+- static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+- static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+- static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
++ static const short t1_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T1,T1,T1,T1};
++ static const short t2_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T2,T2,T2,T2};
++ static const short t3_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T3,T3,T3,T3};
++ static const short c4_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {C4,C4,C4,C4};
+
+ /* column code adapted from peter gubanov */
+ /* http://www.elecard.com/peter/idct.shtml */
+@@ -847,33 +852,33 @@ static inline void idct_col (int16_t * c
+ }
+
+
+-static const int32_t rounder0[] ATTR_ALIGN(8) =
++static const int32_t rounder0[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+-static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+-static const int32_t rounder1[] ATTR_ALIGN(8) =
++static const int32_t rounder4[] ATTR_ALIGN(8) ATTR_VECTOR(16) = rounder (0);
++static const int32_t rounder1[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
+-static const int32_t rounder7[] ATTR_ALIGN(8) =
++static const int32_t rounder7[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
+-static const int32_t rounder2[] ATTR_ALIGN(8) =
++static const int32_t rounder2[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (0.60355339059); /* C2 * (C6+C2)/2 */
+-static const int32_t rounder6[] ATTR_ALIGN(8) =
++static const int32_t rounder6[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (-0.25); /* C2 * (C6-C2)/2 */
+-static const int32_t rounder3[] ATTR_ALIGN(8) =
++static const int32_t rounder3[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
+-static const int32_t rounder5[] ATTR_ALIGN(8) =
++static const int32_t rounder5[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
+ rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+
+
+ #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
+ static inline void idct (int16_t * const block) \
+ { \
+- static const int16_t table04[] ATTR_ALIGN(16) = \
++ static const int16_t table04[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
+ table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
+- static const int16_t table17[] ATTR_ALIGN(16) = \
++ static const int16_t table17[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
+ table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
+- static const int16_t table26[] ATTR_ALIGN(16) = \
++ static const int16_t table26[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
+ table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
+- static const int16_t table35[] ATTR_ALIGN(16) = \
++ static const int16_t table35[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
+ table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
+ \
+ idct_row_head (block, 0*8, table04); \
+@@ -900,29 +905,30 @@ static inline void idct (int16_t * const
+
+ static inline void sse2_idct (int16_t * const block)
+ {
+- static const int16_t table04[] ATTR_ALIGN(16) =
++ static const int16_t table04[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
+ sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520);
+- static const int16_t table17[] ATTR_ALIGN(16) =
++ static const int16_t table17[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
+ sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270);
+- static const int16_t table26[] ATTR_ALIGN(16) =
++ static const int16_t table26[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
+ sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906);
+- static const int16_t table35[] ATTR_ALIGN(16) =
++ static const int16_t table35[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
+ sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315);
+
+- static const int32_t rounder0_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder0_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5);
+- static const int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0);
+- static const int32_t rounder1_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder4_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
++ rounder_sse2 (0);
++ static const int32_t rounder1_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
+- static const int32_t rounder7_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder7_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */
+- static const int32_t rounder2_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder2_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */
+- static const int32_t rounder6_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder6_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */
+- static const int32_t rounder3_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder3_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
+- static const int32_t rounder5_128[] ATTR_ALIGN(16) =
++ static const int32_t rounder5_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+
+ #if defined(__x86_64__)