]> granicus.if.org Git - libx264/commitdiff
x86: AVX-512 load_deinterleave_chroma_fdec
authorHenrik Gramner <henrik@gramner.com>
Sun, 8 Oct 2017 19:41:16 +0000 (21:41 +0200)
committerAnton Mitrofanov <BugMaster@narod.ru>
Sun, 24 Dec 2017 20:47:26 +0000 (23:47 +0300)
common/x86/mc-a2.asm
common/x86/mc-c.c

index 69ed4cd4b924a38a38db9ffd0c4994fcfab1b655..90f5419e536ab7718d85aff235b69934f96d84ac 100644 (file)
@@ -1259,6 +1259,29 @@ cglobal load_deinterleave_chroma_fdec, 4,4
     RET
 %endmacro ; LOAD_DEINTERLEAVE_CHROMA
 
+%macro LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512 0
+cglobal load_deinterleave_chroma_fdec, 4,5
+    vbroadcasti32x8 m0, [deinterleave_shuf32a]
+    mov            r4d, 0x3333ff00
+    kmovd           k1, r4d
+    lea             r4, [r2*3]
+    kshiftrd        k2, k1, 16
+.loop:
+    vbroadcasti128 ym1, [r1]
+    vbroadcasti32x4 m1 {k1}, [r1+r2]
+    vbroadcasti128 ym2, [r1+r2*2]
+    vbroadcasti32x4 m2 {k1}, [r1+r4]
+    lea             r1, [r1+r2*4]
+    pshufb          m1, m0
+    pshufb          m2, m0
+    vmovdqa32 [r0] {k2}, m1
+    vmovdqa32 [r0+mmsize] {k2}, m2
+    add            r0, 2*mmsize
+    sub           r3d, 4
+    jg .loop
+    RET
+%endmacro
+
 %macro LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 0
 cglobal load_deinterleave_chroma_fenc, 4,5
     vbroadcasti128 m0, [deinterleave_shuf]
@@ -1510,6 +1533,7 @@ INIT_YMM avx2
 LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
 PLANE_DEINTERLEAVE_RGB
 INIT_ZMM avx512
+LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512
 LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
 %endif
 
index 0deb13875657f6c7587c9dff74639becf4590d2e..bf697cfc938daf5789c89163ff3609d2db587f31 100644 (file)
@@ -255,6 +255,8 @@ void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, intpt
 void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
 #define x264_load_deinterleave_chroma_fdec_avx2 x264_template(load_deinterleave_chroma_fdec_avx2)
 void x264_load_deinterleave_chroma_fdec_avx2( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_avx512 x264_template(load_deinterleave_chroma_fdec_avx512)
+void x264_load_deinterleave_chroma_fdec_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
 #define x264_memcpy_aligned_sse x264_template(memcpy_aligned_sse)
 void *x264_memcpy_aligned_sse   ( void *dst, const void *src, size_t n );
 #define x264_memcpy_aligned_avx x264_template(memcpy_aligned_avx)
@@ -1081,6 +1083,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
         pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_avx512;
         pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_avx512;
         pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_avx512;
+        pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx512;
         pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx512;
     }
 #endif // HIGH_BIT_DEPTH