arm: x264_plane_copy_deinterleave_rgb_neon

author Janne Grunau <janne-x264@jannau.net>

Sat, 15 Mar 2014 18:21:12 +0000 (19:21 +0100)

committer Fiona Glaser <fiona@x264.com>

Tue, 22 Apr 2014 22:37:49 +0000 (15:37 -0700)
author Janne Grunau <janne-x264@jannau.net>
Sat, 15 Mar 2014 18:21:12 +0000 (19:21 +0100)
committer Fiona Glaser <fiona@x264.com>
Tue, 22 Apr 2014 22:37:49 +0000 (15:37 -0700)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S

index df9e2fb6c885e11388a70cd0638c9f10cca7a426..e9a5f863c846dff91b5f74e46f1204a793972fad 100644 (file)
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1517,3 +1517,55 @@ block:
  
      pop             {r4-r7, pc}
  .endfunc
+
+function x264_plane_copy_deinterleave_rgb_neon
+    push            {r4-r8, r10, r11, lr}
+    ldrd            r4,  r5,  [sp, #32]
+    ldrd            r6,  r7,  [sp, #40]
+    ldr             r8,  [sp, #48]
+    ldrd            r10, r11, [sp, #52]
+    add             lr,  r10, #7
+    subs            r8,  r8,  #3
+    bic             lr,  lr,  #7
+    sub             r7,  r7,  lr, lsl #1
+    sub             r1,  r1,  lr
+    sub             r3,  r3,  lr
+    sub             r5,  r5,  lr
+    subne           r7,  r7,  lr, lsl #1
+    subeq           r7,  r7,  lr
+    bne             block4
+block3:
+    vld3.8          {d0,d1,d2}, [r6]!
+    subs            lr,  lr,  #8
+    vst1.8          {d0},    [r0]!
+    vst1.8          {d1},    [r2]!
+    vst1.8          {d2},    [r4]!
+    bgt             block3
+
+    subs            r11, r11, #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    add             r4,  r4,  r5
+    add             r6,  r6,  r7
+    mov             lr,  r10
+    bgt             block3
+
+    pop             {r4-r8, r10, r11, pc}
+block4:
+    vld4.8          {d0,d1,d2,d3}, [r6]!
+    subs            lr,  lr,  #8
+    vst1.8          {d0},    [r0]!
+    vst1.8          {d1},    [r2]!
+    vst1.8          {d2},    [r4]!
+    bgt             block4
+
+    subs            r11, r11, #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    add             r4,  r4,  r5
+    add             r6,  r6,  r7
+    mov             lr,  r10
+    bgt             block4
+
+    pop             {r4-r8, r10, r11, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c

index e50d7364188e86a4fdb679cdf0a8a66d3fc0ebaa..48b868e4a24cb97ffb20499fb11ee8959ea52a76 100644 (file)
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -50,6 +50,10 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
  void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                           pixel *dstv, intptr_t i_dstv,
                                           pixel *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
+                                            pixel *dstb, intptr_t i_dstb,
+                                            pixel *dstc, intptr_t i_dstc,
+                                            pixel *src,  intptr_t i_src, int pw, int w, int h );
  
  void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
  void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -233,6 +237,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
      pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
  
      pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
  
      pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
      pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
author	Janne Grunau <janne-x264@jannau.net>
	Sat, 15 Mar 2014 18:21:12 +0000 (19:21 +0100)
committer	Fiona Glaser <fiona@x264.com>
	Tue, 22 Apr 2014 22:37:49 +0000 (15:37 -0700)
common/arm/mc-a.S		patch \| blob \| history
common/arm/mc-c.c		patch \| blob \| history