]> granicus.if.org Git - libvpx/commitdiff
VPX: removed step checks from mips convolve code
authorScott LaVarnway <slavarnway@google.com>
Thu, 13 Aug 2015 18:27:04 +0000 (11:27 -0700)
committerScott LaVarnway <slavarnway@google.com>
Thu, 13 Aug 2015 18:27:04 +0000 (11:27 -0700)
The check is handled by the predictor table.

Change-Id: I5e5084ebb46be8087c8c9d80b5f76e919a1cd05b

15 files changed:
vpx_dsp/mips/convolve2_avg_dspr2.c
vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
vpx_dsp/mips/convolve2_horiz_dspr2.c
vpx_dsp/mips/convolve2_vert_dspr2.c
vpx_dsp/mips/convolve8_avg_dspr2.c
vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
vpx_dsp/mips/convolve8_dspr2.c
vpx_dsp/mips/convolve8_horiz_dspr2.c
vpx_dsp/mips/convolve8_vert_dspr2.c
vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
vpx_dsp/mips/vpx_convolve8_avg_msa.c
vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
vpx_dsp/mips/vpx_convolve8_horiz_msa.c
vpx_dsp/mips/vpx_convolve8_msa.c
vpx_dsp/mips/vpx_convolve8_vert_msa.c

index f2a1269a2ac878d488091cc75679446c52c51732..3c767672fbd23afdae79668dc6f08c1c20ed34a9 100644 (file)
@@ -233,47 +233,41 @@ void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
                                   int w, int h) {
-  if (16 == y_step_q4) {
-    uint32_t pos = 38;
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
-
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4:
-      case 8:
-      case 16:
-      case 32:
-        convolve_bi_avg_vert_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_y, w, h);
-        break;
-      case 64:
-        prefetch_store(dst + 32);
-        convolve_bi_avg_vert_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_y, h);
-        break;
-      default:
-        vpx_convolve8_avg_vert_c(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_x, x_step_q4,
-                                 filter_y, y_step_q4,
-                                 w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_avg_vert_c(src, src_stride,
-                             dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4,
-                             w, h);
+  uint32_t pos = 38;
+
+  assert(y_step_q4 == 16);
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );
+
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+      convolve_bi_avg_vert_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_y, w, h);
+      break;
+    case 64:
+      prefetch_store(dst + 32);
+      convolve_bi_avg_vert_64_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_y, h);
+      break;
+    default:
+      vpx_convolve8_avg_vert_c(src, src_stride,
+                               dst, dst_stride,
+                               filter_x, x_step_q4,
+                               filter_y, y_step_q4,
+                               w, h);
+      break;
   }
 }
 #endif
index c031fd44ae9790191d4ad5cb13f30696e6b54195..932a73d39b17beda50b0ac1e59fafb62d5cf0a2b 100644 (file)
@@ -768,64 +768,58 @@ void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_x, int x_step_q4,
                                    const int16_t *filter_y, int y_step_q4,
                                    int w, int h) {
-  if (16 == x_step_q4) {
-    uint32_t pos = 38;
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
-
-    /* prefetch data to cache memory */
-    prefetch_load(src);
-    prefetch_load(src + 32);
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4:
-        convolve_bi_avg_horiz_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-        break;
-      case 8:
-        convolve_bi_avg_horiz_8_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-        break;
-      case 16:
-        convolve_bi_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 1);
-        break;
-      case 32:
-        convolve_bi_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 2);
-        break;
-      case 64:
-        prefetch_load(src + 64);
-        prefetch_store(dst + 32);
-
-        convolve_bi_avg_horiz_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h);
-        break;
-      default:
-        vpx_convolve8_avg_horiz_c(src, src_stride,
-                                  dst, dst_stride,
-                                  filter_x, x_step_q4,
-                                  filter_y, y_step_q4,
-                                  w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_avg_horiz_c(src, src_stride,
-                              dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4,
-                              w, h);
+  uint32_t pos = 38;
+
+  assert(x_step_q4 == 16);
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );
+
+  /* prefetch data to cache memory */
+  prefetch_load(src);
+  prefetch_load(src + 32);
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4:
+      convolve_bi_avg_horiz_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+      break;
+    case 8:
+      convolve_bi_avg_horiz_8_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+      break;
+    case 16:
+      convolve_bi_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 1);
+      break;
+    case 32:
+      convolve_bi_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 2);
+      break;
+    case 64:
+      prefetch_load(src + 64);
+      prefetch_store(dst + 32);
+
+      convolve_bi_avg_horiz_64_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h);
+      break;
+    default:
+      vpx_convolve8_avg_horiz_c(src, src_stride,
+                                dst, dst_stride,
+                                filter_x, x_step_q4,
+                                filter_y, y_step_q4,
+                                w, h);
+      break;
   }
 }
 #endif
index 5c143c506bf30155638001173093870c067c3d28..9fe1a3454b7e43fbdb0bf6175767eecd9b60e0c7 100644 (file)
@@ -646,66 +646,60 @@ void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
                                int w, int h) {
-  if (16 == x_step_q4) {
-    uint32_t pos = 38;
-
-    prefetch_load((const uint8_t *)filter_x);
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
-
-    /* prefetch data to cache memory */
-    prefetch_load(src);
-    prefetch_load(src + 32);
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4:
-        convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-        break;
-      case 8:
-        convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-        break;
-      case 16:
-        convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h, 1);
-        break;
-      case 32:
-        convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h, 2);
-        break;
-      case 64:
-        prefetch_load(src + 64);
-        prefetch_store(dst + 32);
-
-        convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h);
-        break;
-      default:
-        vpx_convolve8_horiz_c(src, src_stride,
-                              dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4,
-                              w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_horiz_c(src, src_stride,
-                          dst, dst_stride,
-                          filter_x, x_step_q4,
-                          filter_y, y_step_q4,
-                          w, h);
+  uint32_t pos = 38;
+
+  assert(x_step_q4 == 16);
+
+  prefetch_load((const uint8_t *)filter_x);
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );
+
+  /* prefetch data to cache memory */
+  prefetch_load(src);
+  prefetch_load(src + 32);
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4:
+      convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+      break;
+    case 8:
+      convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+      break;
+    case 16:
+      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h, 1);
+      break;
+    case 32:
+      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h, 2);
+      break;
+    case 64:
+      prefetch_load(src + 64);
+      prefetch_store(dst + 32);
+
+      convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h);
+      break;
+    default:
+      vpx_convolve8_horiz_c(src, src_stride,
+                            dst, dst_stride,
+                            filter_x, x_step_q4,
+                            filter_y, y_step_q4,
+                            w, h);
+      break;
   }
 }
 #endif
index 275bb5354924a3ce1dcde5ccf2e6af4aba845cc1..dde6ffd54f8182bad34f3128079da4d94f87a0b4 100644 (file)
@@ -218,47 +218,41 @@ void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
-  if (16 == y_step_q4) {
-    uint32_t pos = 38;
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
-
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4 :
-      case 8 :
-      case 16 :
-      case 32 :
-        convolve_bi_vert_4_dspr2(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_y, w, h);
-        break;
-      case 64 :
-        prefetch_store(dst + 32);
-        convolve_bi_vert_64_dspr2(src, src_stride,
-                                  dst, dst_stride,
-                                  filter_y, h);
-        break;
-      default:
-        vpx_convolve8_vert_c(src, src_stride,
-                             dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4,
-                             w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_vert_c(src, src_stride,
-                         dst, dst_stride,
-                         filter_x, x_step_q4,
-                         filter_y, y_step_q4,
-                         w, h);
+  uint32_t pos = 38;
+
+  assert(y_step_q4 == 16);
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );
+
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4 :
+    case 8 :
+    case 16 :
+    case 32 :
+      convolve_bi_vert_4_dspr2(src, src_stride,
+                               dst, dst_stride,
+                               filter_y, w, h);
+      break;
+    case 64 :
+      prefetch_store(dst + 32);
+      convolve_bi_vert_64_dspr2(src, src_stride,
+                                dst, dst_stride,
+                                filter_y, h);
+      break;
+    default:
+      vpx_convolve8_vert_c(src, src_stride,
+                           dst, dst_stride,
+                           filter_x, x_step_q4,
+                           filter_y, y_step_q4,
+                           w, h);
+      break;
   }
 }
 #endif
index dad543dbb11b0687019f46faeac95b84811fe6cc..b8fc0a7d3cc93965372de6cd14e62d905456b99a 100644 (file)
@@ -347,6 +347,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
                                   int w, int h) {
+  assert(y_step_q4 == 16);
   if (((const int32_t *)filter_y)[1] == 0x800000) {
     vpx_convolve_avg(src, src_stride,
                      dst, dst_stride,
@@ -360,47 +361,39 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                  filter_y, y_step_q4,
                                  w, h);
   } else {
-    if (16 == y_step_q4) {
-      uint32_t pos = 38;
-
-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
-
-      prefetch_store(dst);
-
-      switch (w) {
-        case 4:
-        case 8:
-        case 16:
-        case 32:
-          convolve_avg_vert_4_dspr2(src, src_stride,
-                                    dst, dst_stride,
-                                    filter_y, w, h);
-          break;
-        case 64:
-          prefetch_store(dst + 32);
-          convolve_avg_vert_64_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_y, h);
-          break;
-        default:
-          vpx_convolve8_avg_vert_c(src, src_stride,
+    uint32_t pos = 38;
+
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );
+
+    prefetch_store(dst);
+
+    switch (w) {
+      case 4:
+      case 8:
+      case 16:
+      case 32:
+        convolve_avg_vert_4_dspr2(src, src_stride,
+                                  dst, dst_stride,
+                                  filter_y, w, h);
+        break;
+      case 64:
+        prefetch_store(dst + 32);
+        convolve_avg_vert_64_dspr2(src, src_stride,
                                    dst, dst_stride,
-                                   filter_x, x_step_q4,
-                                   filter_y, y_step_q4,
-                                   w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_avg_vert_c(src, src_stride,
-                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
+                                   filter_y, h);
+        break;
+      default:
+        vpx_convolve8_avg_vert_c(src, src_stride,
+                                 dst, dst_stride,
+                                 filter_x, x_step_q4,
+                                 filter_y, y_step_q4,
+                                 w, h);
+        break;
     }
   }
 }
@@ -416,17 +409,12 @@ void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
 
   assert(w <= 64);
   assert(h <= 64);
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);
 
   if (intermediate_height < h)
     intermediate_height = h;
 
-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vpx_convolve8_avg_c(src, src_stride,
-                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
-
   vpx_convolve8_horiz(src - (src_stride * 3), src_stride,
                       temp, 64,
                       filter_x, x_step_q4,
index 5ad075f940bd5bc79cff8bcacdc65341f5f46232..5d7c8f03bdb502d30f5e7efbf00e5695cccd5353 100644 (file)
@@ -957,6 +957,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_x, int x_step_q4,
                                    const int16_t *filter_y, int y_step_q4,
                                    int w, int h) {
+  assert(x_step_q4 == 16);
   if (((const int32_t *)filter_x)[1] == 0x800000) {
     vpx_convolve_avg(src, src_stride,
                      dst, dst_stride,
@@ -970,66 +971,58 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                   filter_y, y_step_q4,
                                   w, h);
   } else {
-    if (16 == x_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;
 
-      src -= 3;
+    src -= 3;
 
-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );
+
+    /* prefetch data to cache memory */
+    prefetch_load(src);
+    prefetch_load(src + 32);
+    prefetch_store(dst);
+
+    switch (w) {
+      case 4:
+        convolve_avg_horiz_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+        break;
+      case 8:
+        convolve_avg_horiz_8_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+        break;
+      case 16:
+        convolve_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 1);
+        break;
+      case 32:
+        convolve_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 2);
+        break;
+      case 64:
+        prefetch_load(src + 64);
+        prefetch_store(dst + 32);
 
-      /* prefetch data to cache memory */
-      prefetch_load(src);
-      prefetch_load(src + 32);
-      prefetch_store(dst);
-
-      switch (w) {
-        case 4:
-          convolve_avg_horiz_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-          break;
-        case 8:
-          convolve_avg_horiz_8_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-          break;
-        case 16:
-          convolve_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 1);
-          break;
-        case 32:
-          convolve_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 2);
-          break;
-        case 64:
-          prefetch_load(src + 64);
-          prefetch_store(dst + 32);
-
-          convolve_avg_horiz_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h);
-          break;
-        default:
-          vpx_convolve8_avg_horiz_c(src + 3, src_stride,
+        convolve_avg_horiz_64_dspr2(src, src_stride,
                                     dst, dst_stride,
-                                    filter_x, x_step_q4,
-                                    filter_y, y_step_q4,
-                                    w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_avg_horiz_c(src, src_stride,
-                                dst, dst_stride,
-                                filter_x, x_step_q4,
-                                filter_y, y_step_q4,
-                                w, h);
+                                    filter_x, h);
+        break;
+      default:
+        vpx_convolve8_avg_horiz_c(src + 3, src_stride,
+                                  dst, dst_stride,
+                                  filter_x, x_step_q4,
+                                  filter_y, y_step_q4,
+                                  w, h);
+        break;
     }
   }
 }
index 398b85a9fb9c8dd66f5772af5a1d3e695021535d..bab13a1326ed06c047bcc3967157a5e8187ef4c7 100644 (file)
@@ -936,6 +936,9 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
   int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
   uint32_t pos = 38;
 
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);
+
   /* bit positon for extract from acc */
   __asm__ __volatile__ (
     "wrdsp      %[pos],     1           \n\t"
@@ -946,13 +949,6 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
   if (intermediate_height < h)
     intermediate_height = h;
 
-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vpx_convolve8_c(src, src_stride,
-                           dst, dst_stride,
-                           filter_x, x_step_q4,
-                           filter_y, y_step_q4,
-                           w, h);
-
   if ((((const int32_t *)filter_x)[1] == 0x800000)
       && (((const int32_t *)filter_y)[1] == 0x800000))
     return vpx_convolve_copy(src, src_stride,
index c71f93f24fcfe8d6057c7844ffe5a40517a19869..2e51fd5c8bc588db5e73a5e359cddb8e73630445 100644 (file)
@@ -841,6 +841,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
                                int w, int h) {
+  assert(x_step_q4 == 16);
   if (((const int32_t *)filter_x)[1] == 0x800000) {
     vpx_convolve_copy(src, src_stride,
                       dst, dst_stride,
@@ -854,67 +855,59 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               filter_y, y_step_q4,
                               w, h);
   } else {
-    if (16 == x_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;
 
-      prefetch_load((const uint8_t *)filter_x);
-      src -= 3;
+    prefetch_load((const uint8_t *)filter_x);
+    src -= 3;
 
-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );
 
-      /* prefetch data to cache memory */
-      prefetch_load(src);
-      prefetch_load(src + 32);
-      prefetch_store(dst);
-
-      switch (w) {
-        case 4:
-          convolve_horiz_4_dspr2(src, (int32_t)src_stride,
-                                 dst, (int32_t)dst_stride,
-                                 filter_x, (int32_t)h);
-          break;
-        case 8:
-          convolve_horiz_8_dspr2(src, (int32_t)src_stride,
-                                 dst, (int32_t)dst_stride,
-                                 filter_x, (int32_t)h);
-          break;
-        case 16:
-          convolve_horiz_16_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h, 1);
-          break;
-        case 32:
-          convolve_horiz_16_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h, 2);
-          break;
-        case 64:
-          prefetch_load(src + 64);
-          prefetch_store(dst + 32);
-
-          convolve_horiz_64_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-          break;
-        default:
-          vpx_convolve8_horiz_c(src + 3, src_stride,
-                                dst, dst_stride,
-                                filter_x, x_step_q4,
-                                filter_y, y_step_q4,
-                                w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_horiz_c(src, src_stride,
-                            dst, dst_stride,
-                            filter_x, x_step_q4,
-                            filter_y, y_step_q4,
-                            w, h);
+    /* prefetch data to cache memory */
+    prefetch_load(src);
+    prefetch_load(src + 32);
+    prefetch_store(dst);
+
+    switch (w) {
+      case 4:
+        convolve_horiz_4_dspr2(src, (int32_t)src_stride,
+                               dst, (int32_t)dst_stride,
+                               filter_x, (int32_t)h);
+        break;
+      case 8:
+        convolve_horiz_8_dspr2(src, (int32_t)src_stride,
+                               dst, (int32_t)dst_stride,
+                               filter_x, (int32_t)h);
+        break;
+      case 16:
+        convolve_horiz_16_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h, 1);
+        break;
+      case 32:
+        convolve_horiz_16_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h, 2);
+        break;
+      case 64:
+        prefetch_load(src + 64);
+        prefetch_store(dst + 32);
+
+        convolve_horiz_64_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+        break;
+      default:
+        vpx_convolve8_horiz_c(src + 3, src_stride,
+                              dst, dst_stride,
+                              filter_x, x_step_q4,
+                              filter_y, y_step_q4,
+                              w, h);
+        break;
     }
   }
 }
index 6922387b5b80ab8350908bebf74fb144f8f8d255..05d088c1694d0997076c85e580f45e5e6bc3d02e 100644 (file)
@@ -333,6 +333,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
+  assert(y_step_q4 == 16);
   if (((const int32_t *)filter_y)[1] == 0x800000) {
     vpx_convolve_copy(src, src_stride,
                       dst, dst_stride,
@@ -346,47 +347,39 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                              filter_y, y_step_q4,
                              w, h);
   } else {
-    if (16 == y_step_q4) {
-      uint32_t pos = 38;
-
-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
-
-      prefetch_store(dst);
-
-      switch (w) {
-        case 4 :
-        case 8 :
-        case 16 :
-        case 32 :
-          convolve_vert_4_dspr2(src, src_stride,
-                                dst, dst_stride,
-                                filter_y, w, h);
-          break;
-        case 64 :
-          prefetch_store(dst + 32);
-          convolve_vert_64_dspr2(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_y, h);
-          break;
-        default:
-          vpx_convolve8_vert_c(src, src_stride,
+    uint32_t pos = 38;
+
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );
+
+    prefetch_store(dst);
+
+    switch (w) {
+      case 4 :
+      case 8 :
+      case 16 :
+      case 32 :
+        convolve_vert_4_dspr2(src, src_stride,
+                              dst, dst_stride,
+                              filter_y, w, h);
+        break;
+      case 64 :
+        prefetch_store(dst + 32);
+        convolve_vert_64_dspr2(src, src_stride,
                                dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_vert_c(src, src_stride,
-                           dst, dst_stride,
-                           filter_x, x_step_q4,
-                           filter_y, y_step_q4,
-                           w, h);
+                               filter_y, h);
+        break;
+      default:
+        vpx_convolve8_vert_c(src, src_stride,
+                             dst, dst_stride,
+                             filter_x, x_step_q4,
+                             filter_y, y_step_q4,
+                             w, h);
+        break;
     }
   }
 }
index a3dca326e24090a734677649dd3d16c3f2ef6fdf..75ec3df771471eb466482e36d7ef1b8d2fa0570d 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -665,12 +666,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                  int w, int h) {
   int8_t cnt, filt_hor[8];
 
-  if (16 != x_step_q4) {
-    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
 
   if (((const int32_t *)filter_x)[1] == 0x800000) {
     vpx_convolve_avg(src, src_stride, dst, dst_stride,
index b4e023d21d9dd371d3b0b425506aa3d0c272140f..9e96803d5fda966f7918efb836bcadfa4d447c8d 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -574,12 +575,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                            int w, int h) {
   int8_t cnt, filt_hor[8], filt_ver[8];
 
-  if (16 != x_step_q4 || 16 != y_step_q4) {
-    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
-                        filter_x, x_step_q4, filter_y, y_step_q4,
-                        w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);
 
   if (((const int32_t *)filter_x)[1] == 0x800000 &&
       ((const int32_t *)filter_y)[1] == 0x800000) {
index ba40b052f9057f72854f94b674d1b0d3c0029ae8..4e1d180b6247d35615924ac16ee5d977889b1218 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -639,12 +640,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                 int w, int h) {
   int8_t cnt, filt_ver[8];
 
-  if (16 != y_step_q4) {
-    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4,
-                             w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
 
   if (((const int32_t *)filter_y)[1] == 0x800000) {
     vpx_convolve_avg(src, src_stride, dst, dst_stride,
index 1fdc20fec144a23e810282c107047e91101633f8..57df804dc89a414cf695f4b70eef6a89701bd24a 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -625,12 +626,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                              int w, int h) {
   int8_t cnt, filt_hor[8];
 
-  if (16 != x_step_q4) {
-    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                          filter_x, x_step_q4, filter_y, y_step_q4,
-                          w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
 
   if (((const int32_t *)filter_x)[1] == 0x800000) {
     vpx_convolve_copy(src, src_stride, dst, dst_stride,
index 7aeb1704a704b29403a8783c2a735f6ac497bd52..355cc332f129262aa51ba561e422d3f50f9d7904 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -548,12 +549,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                        int32_t w, int32_t h) {
   int8_t cnt, filt_hor[8], filt_ver[8];
 
-  if (16 != x_step_q4 || 16 != y_step_q4) {
-    vpx_convolve8_c(src, src_stride, dst, dst_stride,
-                    filter_x, x_step_q4, filter_y, y_step_q4,
-                    w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);
 
   if (((const int32_t *)filter_x)[1] == 0x800000 &&
       ((const int32_t *)filter_y)[1] == 0x800000) {
index 4d47bccf8c90e75fc18dcc0758d27adeb14a53ec..f166005833acdce54ef4eed2937579e9c3aba384 100644 (file)
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"
 
@@ -632,12 +633,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                             int w, int h) {
   int8_t cnt, filt_ver[8];
 
-  if (16 != y_step_q4) {
-    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                         filter_x, x_step_q4, filter_y, y_step_q4,
-                         w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
 
   if (((const int32_t *)filter_y)[1] == 0x800000) {
     vpx_convolve_copy(src, src_stride, dst, dst_stride,