Do not emit undefined lsrh/ashr for NEON shifts

author Amaury de la Vieuville <amaury.dlv@gmail.com>

Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)

committer Amaury de la Vieuville <amaury.dlv@gmail.com>

Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)
author Amaury de la Vieuville <amaury.dlv@gmail.com>
Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)
committer Amaury de la Vieuville <amaury.dlv@gmail.com>
Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp

index b4caab22979f724999ee876e972e6f983e1bf55d..2fe4167a888580f3a31ae46111f8a611d8a4b860 100644 (file)
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1657,6 +1657,39 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
    return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
  }
  
+// \brief Right-shift a vector by a constant.
+Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
+                                          llvm::Type *Ty, bool usgn,
+                                          const char *name) {
+  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+
+  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
+  int EltSize = VTy->getScalarSizeInBits();
+
+  Vec = Builder.CreateBitCast(Vec, Ty);
+
+  // lshr/ashr are undefined when the shift amount is equal to the vector
+  // element size.
+  if (ShiftAmt == EltSize) {
+    if (usgn) {
+      // Right-shifting an unsigned value by its size yields 0.
+      llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0);
+      return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero);
+    } else {
+      // Right-shifting a signed value by its size is equivalent
+      // to a shift of size-1.
+      --ShiftAmt;
+      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
+    }
+  }
+
+  Shift = EmitNeonShiftVector(Shift, Ty, false);
+  if (usgn)
+    return Builder.CreateLShr(Vec, Shift, name);
+  else
+    return Builder.CreateAShr(Vec, Shift, name);
+}
+
  /// GetPointeeAlignment - Given an expression with a pointer type, find the
  /// alignment of the type referenced by the pointer.  Skip over implicit
  /// casts.
@@ -3125,12 +3158,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                          Ops, "vshrn_n", 1, true);
    case ARM::BI__builtin_neon_vshr_n_v:
    case ARM::BI__builtin_neon_vshrq_n_v:
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
-    if (usgn)
-      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
-    else
-      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
+    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
    case ARM::BI__builtin_neon_vsri_n_v:
    case ARM::BI__builtin_neon_vsriq_n_v:
      rightShift = true;
@@ -3142,12 +3170,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
    case ARM::BI__builtin_neon_vsra_n_v:
    case ARM::BI__builtin_neon_vsraq_n_v:
      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
-    if (usgn)
-      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
-    else
-      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
+    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
      return Builder.CreateAdd(Ops[0], Ops[1]);
    case ARM::BI__builtin_neon_vst1_v:
    case ARM::BI__builtin_neon_vst1q_v:
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h

index 8c506ac860940e0d6d335e73849c73686d88cbef..19c64b71a467e48aa6f10d6c0a8b5ce3150eb051 100644 (file)
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2155,6 +2155,8 @@ public:
    llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
    llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty,
                                     bool negateForRightShift);
+  llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
+                                 llvm::Type *Ty, bool usgn, const char *name);
  
    llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
    llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/test/CodeGen/aarch64-neon-shifts.c b/test/CodeGen/aarch64-neon-shifts.c

new file mode 100644 (file)

index 0000000..9b939e6
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-shifts.c
@@ -0,0 +1,43 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr
+  // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_smax
+  // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_umax
+  // CHECK: ret <8 x i8> zeroinitializer
+  return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra
+  // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %0 = add <8 x i8> %vsra_n, %a
+  return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_smax
+  // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %0 = add <8 x i8> %vsra_n, %a
+  return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_umax
+  // CHECK: ret <8 x i8> %a
+  return vsra_n_u8(a, b, 8);
+}
diff --git a/test/CodeGen/arm-neon-shifts.c b/test/CodeGen/arm-neon-shifts.c

new file mode 100644 (file)

index 0000000..a89ddf5
--- /dev/null
+++ b/test/CodeGen/arm-neon-shifts.c
@@ -0,0 +1,45 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple thumbv7-apple-darwin \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -ffreestanding \
+// RUN:   -emit-llvm -w -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr
+  // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_smax
+  // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_umax
+  // CHECK: ret <8 x i8> zeroinitializer
+  return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra
+  // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %0 = add <8 x i8> %vsra_n, %a
+  return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_smax
+  // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %0 = add <8 x i8> %vsra_n, %a
+  return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_umax
+  // CHECK: ret <8 x i8> %a
+  return vsra_n_u8(a, b, 8);
+}
author	Amaury de la Vieuville <amaury.dlv@gmail.com>
	Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)
committer	Amaury de la Vieuville <amaury.dlv@gmail.com>
	Fri, 4 Oct 2013 13:13:15 +0000 (13:13 +0000)
lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
lib/CodeGen/CodeGenFunction.h		patch \| blob \| history
test/CodeGen/aarch64-neon-shifts.c	[new file with mode: 0644]	patch \| blob
test/CodeGen/arm-neon-shifts.c	[new file with mode: 0644]	patch \| blob