AMDGPU: Support inlineasm for packed instructions

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index bf4d3fb9b4facc1ca660b2865a965216de056cd2..109870781f59ef8e122837982deea321c56dd3e3 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -126,6 +126,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
      addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
    }
  
+  if (Subtarget->hasVOP3PInsts()) {
+    addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
+    addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
+  }
+
    computeRegisterProperties(STI.getRegisterInfo());
  
    // We need to custom lower vector stores from local memory
@@ -202,7 +207,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
  
    // We only support LOAD/STORE and vector manipulation ops for vectors
    // with > 4 elements.
-  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
+  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+        MVT::v2i64, MVT::v2f64}) {
      for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
        switch (Op) {
        case ISD::LOAD:
@@ -372,6 +378,41 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
        setOperationAction(ISD::FMAD, MVT::f16, Legal);
    }
  
+  if (Subtarget->hasVOP3PInsts()) {
+    for (MVT VT : {MVT::v2i16, MVT::v2f16}) {
+      for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+        switch (Op) {
+        case ISD::LOAD:
+        case ISD::STORE:
+        case ISD::BUILD_VECTOR:
+        case ISD::BITCAST:
+        case ISD::EXTRACT_VECTOR_ELT:
+        case ISD::INSERT_VECTOR_ELT:
+        case ISD::INSERT_SUBVECTOR:
+        case ISD::EXTRACT_SUBVECTOR:
+        case ISD::SCALAR_TO_VECTOR:
+          break;
+        case ISD::CONCAT_VECTORS:
+          setOperationAction(Op, VT, Custom);
+          break;
+        default:
+          setOperationAction(Op, VT, Expand);
+          break;
+        }
+      }
+    }
+
+    setOperationAction(ISD::STORE, MVT::v2i16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
+    setOperationAction(ISD::STORE, MVT::v2f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);
+
+    setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
+    setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);
+  }
+
    setTargetDAGCombine(ISD::FADD);
    setTargetDAGCombine(ISD::FSUB);
    setTargetDAGCombine(ISD::FMINNUM);
diff --git a/test/CodeGen/AMDGPU/inlineasm-packed.ll b/test/CodeGen/AMDGPU/inlineasm-packed.ll

new file mode 100644 (file)

index 0000000..23a0ed3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/inlineasm-packed.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}inline_asm_input_v2i16:
+; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
+define void @inline_asm_input_v2i16(i32 addrspace(1)* %out, <2 x i16> %in) #0 {
+entry:
+  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x i16> %in) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_asm_input_v2f16:
+; GCN: s_mov_b32 s0, s{{[0-9]+}}
+define void @inline_asm_input_v2f16(i32 addrspace(1)* %out, <2 x half> %in) #0 {
+entry:
+  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x half> %in) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_asm_output_v2i16:
+; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
+define void @inline_asm_output_v2i16(<2 x i16> addrspace(1)* %out, i32 %in) #0 {
+entry:
+  %val = call <2 x i16> asm "s_mov_b32 $0, $1", "=r,r"(i32 %in) #0
+  store <2 x i16> %val, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_asm_output_v2f16:
+; GCN: v_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
+define void @inline_asm_output_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 {
+entry:
+  %val = call <2 x half> asm "v_mov_b32 $0, $1", "=v,r"(i32 %in) #0
+  store <2 x half> %val, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_asm_packed_v2i16:
+; GCN: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+define void @inline_asm_packed_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %in0, <2 x i16> %in1) #0 {
+entry:
+  %val = call <2 x i16> asm "v_pk_add_u16 $0, $1, $2", "=v,r,v"(<2 x i16> %in0, <2 x i16> %in1) #0
+  store <2 x i16> %val, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}inline_asm_packed_v2f16:
+; GCN: v_pk_add_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+define void @inline_asm_packed_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in0, <2 x half> %in1) #0 {
+entry:
+  %val = call <2 x half> asm "v_pk_add_f16 $0, $1, $2", "=v,r,v"(<2 x half> %in0, <2 x half> %in1) #0
+  store <2 x half> %val, <2 x half> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 27 Feb 2017 20:52:10 +0000 (20:52 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/inlineasm-packed.ll	[new file with mode: 0644]	patch \| blob