]> granicus.if.org Git - libvpx/commitdiff
highbd_variance_neon,cosmetics: reorder a few lines
authorJames Zern <jzern@google.com>
Sat, 27 Aug 2022 05:12:44 +0000 (22:12 -0700)
committerJames Zern <jzern@google.com>
Sat, 27 Aug 2022 05:12:44 +0000 (22:12 -0700)
Change-Id: Ia6fa54652d7f94687e64108482bb0f28ca06cf49

vpx_dsp/arm/highbd_variance_neon.c

index 3a60a14ab84681c13e6b6f4f643c5dc87776f2fc..96a35af01c2000d443f6ab23b686e74353afa5aa 100644 (file)
@@ -233,14 +233,12 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass(
   if (output_width >= 8) {
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; j += 8) {
-        uint32x4_t sum1_u32;
-        uint32x4_t sum2_u32;
-        uint16x4_t out1_u16;
-        uint16x4_t out2_u16;
         const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]);
         const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]);
-        sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
-        sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+        uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
+        uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+        uint16x4_t out1_u16;
+        uint16x4_t out2_u16;
         sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16));
         sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16));
         out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS);
@@ -255,11 +253,10 @@ static INLINE void highbd_var_filter_block2d_bil_first_pass(
     assert(output_width >= 4);
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; j += 4) {
-        uint32x4_t sum_u32;
-        uint16x4_t out_u16;
         const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]);
         const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]);
-        sum_u32 = vmull_u16(filter1_u16, src1_u16);
+        uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16);
+        uint16x4_t out_u16;
         sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16);
         out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS);
         vst1_u16(&output_ptr[j], out_u16);
@@ -285,14 +282,12 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass(
   if (output_width >= 8) {
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; j += 8) {
-        uint32x4_t sum1_u32;
-        uint32x4_t sum2_u32;
-        uint16x4_t out1_u16;
-        uint16x4_t out2_u16;
         const uint16x8_t src1_u16 = vld1q_u16(&src_ptr[j]);
         const uint16x8_t src2_u16 = vld1q_u16(&src_ptr[j + pixel_step]);
-        sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
-        sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+        uint32x4_t sum1_u32 = vmull_u16(filter1_u16, vget_low_u16(src1_u16));
+        uint32x4_t sum2_u32 = vmull_u16(filter1_u16, vget_high_u16(src1_u16));
+        uint16x4_t out1_u16;
+        uint16x4_t out2_u16;
         sum1_u32 = vmlal_u16(sum1_u32, filter2_u16, vget_low_u16(src2_u16));
         sum2_u32 = vmlal_u16(sum2_u32, filter2_u16, vget_high_u16(src2_u16));
         out1_u16 = vshrn_n_u32(vaddq_u32(sum1_u32, round_u32), FILTER_BITS);
@@ -307,11 +302,10 @@ static INLINE void highbd_var_filter_block2d_bil_second_pass(
     assert(output_width >= 4);
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; j += 4) {
-        uint32x4_t sum_u32;
-        uint16x4_t out_u16;
         const uint16x4_t src1_u16 = vld1_u16(&src_ptr[j]);
         const uint16x4_t src2_u16 = vld1_u16(&src_ptr[j + pixel_step]);
-        sum_u32 = vmull_u16(filter1_u16, src1_u16);
+        uint32x4_t sum_u32 = vmull_u16(filter1_u16, src1_u16);
+        uint16x4_t out_u16;
         sum_u32 = vmlal_u16(sum_u32, filter2_u16, src2_u16);
         out_u16 = vshrn_n_u32(vaddq_u32(sum_u32, round_u32), FILTER_BITS);
         vst1_u16(&output_ptr[j], out_u16);