* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2015, D. R. Commander. All Rights Reserved.
- * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
+ * Copyright (C) 2015, 2019, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Authors: ZhuChen <zhuchen@loongson.cn>
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
mmC = _mm_unpacklo_pi32(mmC, mmF); /* mmC=(15 25 06 16 26 07 17 27) */
if (num_cols >= 8) {
- _mm_store_si64((__m64 *)outptr, mmA);
- _mm_store_si64((__m64 *)(outptr + 8), mmE);
- _mm_store_si64((__m64 *)(outptr + 16), mmC);
+ if (!(((long)outptr) & 7)) {
+ _mm_store_si64((__m64 *)outptr, mmA);
+ _mm_store_si64((__m64 *)(outptr + 8), mmE);
+ _mm_store_si64((__m64 *)(outptr + 16), mmC);
+ } else {
+ _mm_storeu_si64((__m64 *)outptr, mmA);
+ _mm_storeu_si64((__m64 *)(outptr + 8), mmE);
+ _mm_storeu_si64((__m64 *)(outptr + 16), mmC);
+ }
outptr += RGB_PIXELSIZE * 8;
} else {
col = num_cols * 3;
extern __inline void FUNCTION_ATTRIBS
_mm_store_si64(__m64 *dest, __m64 src)
{
- asm("gssdlc1 %1, 7+%0\n\t"
- "gssdrc1 %1, %0\n\t"
+ asm("sdc1 %1, %0 \n\t"
: "=m" (*dest)
: "f" (src)
: "memory"
);
}
+extern __inline void FUNCTION_ATTRIBS
+_mm_storeu_si64(__m64 *dest, __m64 src)
+{
+ asm("gssdlc1 %1, 7(%0) \n\t"
+ "gssdrc1 %1, 0(%0) \n\t"
+ :
+ : "r" (dest), "f" (src)
+ : "memory"
+ );
+}
+
extern __inline __m64 FUNCTION_ATTRIBS
_mm_load_si32(const __m32 *src)
{