]> granicus.if.org Git - libx264/commitdiff
arm: Load mb_y properly in mbtree_propagate_list_internal_neon
authorMartin Storsjö <martin@martin.st>
Mon, 26 Dec 2016 22:22:48 +0000 (00:22 +0200)
committerHenrik Gramner <henrik@gramner.com>
Thu, 29 Dec 2016 22:06:12 +0000 (23:06 +0100)
The previous version, attempting to load two stack parameters at once,
only would have worked if they were interpreted and loaded as 32 bit
elements, not when loading them as 16 bit elements.

common/arm/mc-a.S

index 165c1fa92979f316190cfdc09bf0d729494c9cb0..8c151915a40d1b21ffe6514e50d6c685af46b08c 100644 (file)
@@ -1818,13 +1818,14 @@ function x264_mbtree_propagate_cost_neon
 endfunc
 
 function x264_mbtree_propagate_list_internal_neon
-    vld2.16         {d4[], d5[]}, [sp]      @ bipred_weight, mb_y
+    vld1.16         {d4[]}, [sp]            @ bipred_weight
     movrel          r12, pw_0to15
     vmov.u16        q10, #0xc000
     vld1.16         {q0},  [r12, :128]      @h->mb.i_mb_x,h->mb.i_mb_y
+    ldrh            r12,  [sp, #4]
     vmov.u32        q11, #4
     vmov.u8         q3,  #32
-    vdup.u16        q8,  d5[0]              @ mb_y
+    vdup.u16        q8,  r12                @ mb_y
     vzip.u16        q0,  q8
     ldr             r12, [sp, #8]
 8: