[AArch64] Implement lowering of the X constraint on AArch64

author Silviu Baranga <silviu.baranga@arm.com>

Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)

committer Silviu Baranga <silviu.baranga@arm.com>

Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)
author Silviu Baranga <silviu.baranga@arm.com>
Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)
committer Silviu Baranga <silviu.baranga@arm.com>
Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 575f9d9fa5b159e1c5505ae7da114f27f3889049..5b4516b36fbc4908483015007fa8a867ee58ac0d 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4687,6 +4687,27 @@ SDValue AArch64TargetLowering::getRsqrtEstimate(SDValue Operand,
  // is prefixed by the %w modifier. Floating-point and SIMD register operands
  // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
  // %q modifier.
+const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
+  // At this point, we have to lower this constraint to something else, so we
+  // lower it to an "r" or "w". However, by doing this we will force the result
+  // to be in register, while the X constraint is much more permissive.
+  //
+  // Although we are correct (we are free to emit anything, without
+  // constraints), we might break use cases that would expect us to be more
+  // efficient and emit something else.
+  if (!Subtarget->hasFPARMv8())
+    return "r";
+
+  if (ConstraintVT.isFloatingPoint())
+    return "w";
+
+  if (ConstraintVT.isVector() &&
+     (ConstraintVT.getSizeInBits() == 64 ||
+      ConstraintVT.getSizeInBits() == 128))
+    return "w";
+
+  return "r";
+}
  
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index 65e2614461de26ba88786d041c45bdfe25dd7215..ea2b8c1904f6ead3dc40a7007ffd1a8776ba9939 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -535,6 +535,9 @@ private:
    std::pair<unsigned, const TargetRegisterClass *>
    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                 StringRef Constraint, MVT VT) const override;
+
+  const char *LowerXConstraint(EVT ConstraintVT) const override;
+
    void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
                                      std::vector<SDValue> &Ops,
                                      SelectionDAG &DAG) const override;
diff --git a/test/CodeGen/AArch64/inlineasm-X-allocation.ll b/test/CodeGen/AArch64/inlineasm-X-allocation.ll

new file mode 100644 (file)

index 0000000..1d7a24e
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-X-allocation.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=-fp-armv8 %s -o - | FileCheck %s  -check-prefix=nofp
+
+; In the novfp case, the compiler is forced to assign a core register,
+; even if the input is a float.
+
+; nofp-LABEL: f1
+; nofp-CHECK: ldr x0, [sp]
+
+; This can be generated by a function such as:
+;  void f1(float f) {asm volatile ("ldr $0, [sp]" : : "X" (f));}
+
+define void @f1(float %f) {
+entry:
+  call void asm sideeffect "ldr $0, [sp]", "X" (float %f) nounwind
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/inlineasm-X-constraint.ll b/test/CodeGen/AArch64/inlineasm-X-constraint.ll

new file mode 100644 (file)

index 0000000..77652cc
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-X-constraint.ll
@@ -0,0 +1,152 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s
+
+; The following functions test the use case where an X constraint is used to
+; add a dependency between an assembly instruction (vmsr in this case) and
+; another instruction. In each function, we use a different type for the
+; X constraint argument.
+;
+; We can something similar from the following C code:
+; double f1(double f, int pscr_value) {
+;   asm volatile("msr fpsr,%1" : "=X" ((f)): "r" (pscr_value));
+;   return f+f;
+; }
+
+; CHECK-LABEL: f1
+; CHECK: msr FPSR
+; CHECK: fadd d
+
+define  double @f1(double %f, i32 %pscr_value) {
+entry:
+  %f.addr = alloca double, align 8
+  store double %f, double* %f.addr, align 8
+  call void asm sideeffect "msr fpsr,$1", "=*X,r"(double* nonnull %f.addr, i32 %pscr_value) nounwind
+  %0 = load double, double* %f.addr, align 8
+  %add = fadd double %0, %0
+  ret double %add
+}
+
+; int f2(int f, int pscr_value) {
+;   asm volatile("msr fpsr,$1" : "=X" ((f)): "r" (pscr_value));
+;   return f*f;
+; }
+
+; CHECK-LABEL: f2
+; CHECK: msr FPSR
+; CHECK: mul
+define  i32 @f2(i32 %f, i32 %pscr_value) {
+entry:
+  %f.addr = alloca i32, align 4
+  store i32 %f, i32* %f.addr, align 4
+  call void asm sideeffect "msr fpsr,$1", "=*X,r"(i32* nonnull %f.addr, i32 %pscr_value) nounwind
+  %0 = load i32, i32* %f.addr, align 4
+  %mul = mul i32 %0, %0
+  ret i32 %mul
+}
+
+; typedef signed char int8_t;
+; typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+; void f3 (void)
+; {
+;   int8x8_t vector_res_int8x8;
+;   unsigned int fpscr;
+;   asm volatile ("msr fpsr,$1" : "=X" ((vector_res_int8x8)) : "r" (fpscr));
+;   return vector_res_int8x8 * vector_res_int8x8;
+; }
+
+; CHECK-LABEL: f3
+; CHECK: msr FPSR
+; CHECK: mul
+define  <8 x i8> @f3() {
+entry:
+  %vector_res_int8x8 = alloca <8 x i8>, align 8
+  %0 = getelementptr inbounds <8 x i8>, <8 x i8>* %vector_res_int8x8, i32 0, i32 0
+  call void asm sideeffect "msr fpsr,$1", "=*X,r"(<8 x i8>* nonnull %vector_res_int8x8, i32 undef) nounwind
+  %1 = load <8 x i8>, <8 x i8>* %vector_res_int8x8, align 8
+  %mul = mul <8 x i8> %1, %1
+  ret <8 x i8> %mul
+}
+
+; We can emit integer constants.
+; We can get this from:
+; void f() {
+;   int x = 2;
+;   asm volatile ("add x0, x0, %0" : : "X" (x));
+; }
+;
+; CHECK-LABEL: f4
+; CHECK: add x0, x0, #2
+define void @f4() {
+entry:
+  tail call void asm sideeffect "add x0, x0, $0", "X"(i32 2)
+  ret void
+}
+
+; We can emit function labels. This is equivalent to the following C code:
+; void f(void) {
+;   void (*x)(void) = &foo;
+;   asm volatile ("bl %0" : : "X" (x));
+; }
+; CHECK-LABEL: f5
+; CHECK: bl f4
+define void @f5() {
+entry:
+  tail call void asm sideeffect "bl $0", "X"(void ()* nonnull @f4)
+  ret void
+}
+
+declare void @foo(...)
+
+; This tests the behavior of the X constraint when used on functions pointers,
+; or functions with a cast. In the first asm call we figure out that this
+; is a function pointer and emit the label. However, in the second asm call
+; we can't see through the bitcast and we end up having to lower this constraint
+; to something else. This is not ideal, but it is a correct behaviour according
+; to the definition of the X constraint.
+;
+; In this case (and other cases where we could have emitted something else),
+; what we're doing with the X constraint is not particularly useful either,
+; since the user could have used "r" in this situation for the same effect.
+
+; CHECK-LABEL: f6
+; CHECK: bl foo
+; CHECK: br x
+
+define void @f6() nounwind {
+entry:
+  tail call void asm sideeffect "bl $0", "X"(void (...)* @foo) nounwind
+  tail call void asm sideeffect "br $0", "X"(void (...)* bitcast (void ()* @f4 to void (...)*)) nounwind
+  ret void
+}
+
+; The following IR can be generated from C code with a function like:
+; void a() {
+;   void* a = &&A;
+;   asm volatile ("bl %0" : : "X" (a));
+;  A:
+;   return;
+; }
+;
+; Ideally this would give the block address of bb, but it requires us to see
+; through blockaddress, which we can't do at the moment. This might break some
+; existing use cases where a user would expect to get a block label and instead
+; gets the block address in a register. However, note that according to the
+; "no constraints" definition this behaviour is correct (although not very nice).
+
+; CHECK-LABEL: f7
+; CHECK: bl
+define void @f7() {
+  call void asm sideeffect "br $0", "X"( i8* blockaddress(@f7, %bb) )
+  br label %bb
+bb:
+  ret void
+}
+
+; If we use a constraint "=*X", we should get a store back to *%x (in x0).
+; CHECK-LABEL: f8
+; CHECK: add    [[Dest:x[0-9]+]], x0, x0
+; CHECK: str   [[Dest]], [x0]
+define void @f8(i64 *%x) {
+entry:
+  tail call void asm sideeffect "add $0, x0, x0", "=*X"(i64 *%x)
+  ret void
+}
author	Silviu Baranga <silviu.baranga@arm.com>
	Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)
committer	Silviu Baranga <silviu.baranga@arm.com>
	Mon, 9 May 2016 11:10:44 +0000 (11:10 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/inlineasm-X-allocation.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/inlineasm-X-constraint.ll	[new file with mode: 0644]	patch \| blob