IR: Add immarg attribute

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index 261d1d47d20ea818a5a0cb50b889e17455c7ea45..009f8554aef5c8906ae6a10a5585e4989c148ed5 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1178,6 +1178,13 @@ Currently, only the following parameter attributes are defined:
      does not alias any other memory visible within a function and that a
      ``swifterror`` alloca passed as an argument does not escape.
  
+``immarg``
+    This indicates the parameter is required to be an immediate
+    value. This must be a trivial immediate integer or floating-point
+    constant. Undef or constant expressions are not valid. This is
+    only valid on intrinsic declarations and cannot be applied to a
+    call site or arbitrary function.
+
  .. _gc:
  
  Garbage Collector Strategy Names
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst

index ff116b655593bf500e2ee231c2c219b8ad807ee8..e0cebf508c45894d7e047544c0611d2a2ae89a51 100644 (file)
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -58,6 +58,10 @@ Non-comprehensive list of changes in this release
  Changes to the LLVM IR
  ----------------------
  
+* Added ``immarg`` parameter attribute. This indicates an intrinsic
+  parameter is required to be a simple constant. This annotation must
+  be accurate to avoid possible miscompiles.
+
  
  Changes to the ARM Backend
  --------------------------
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h

index 71148ac55c3a3585d13ebd55365b0ffae59169fb..9a66ad772da322c46f72a329703717a25ff1d58d 100644 (file)
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -605,6 +605,7 @@ enum AttributeKindCodes {
    ATTR_KIND_OPT_FOR_FUZZING = 57,
    ATTR_KIND_SHADOWCALLSTACK = 58,
    ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
+  ATTR_KIND_IMMARG = 60
  };
  
  enum ComdatSelectionKindCodes {
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td

index e786d85d05a870766a4c2df6f9cc9431a1cae87e..a2452f379d318114177bdf4942ddac5f633c1048 100644 (file)
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -130,6 +130,9 @@ def ReadOnly : EnumAttr<"readonly">;
  /// Return value is always equal to this argument.
  def Returned : EnumAttr<"returned">;
  
+/// Parameter is required to be a trivial constant.
+def ImmArg : EnumAttr<"immarg">;
+
  /// Function can return twice.
  def ReturnsTwice : EnumAttr<"returns_twice">;
  
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td

index 090ee60ede76a3dab6054f68f655d39534351a6b..680fd145f7523ce67761c551f31cc7859dda6599 100644 (file)
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -69,6 +69,11 @@ class Returned<int argNo> : IntrinsicProperty {
    int ArgNo = argNo;
  }
  
+// ImmArg - The specified argument must be an immediate.
+class ImmArg<int argNo> : IntrinsicProperty {
+  int ArgNo = argNo;
+}
+
  // ReadOnly - The specified argument pointer is not written to through the
  // pointer by the intrinsic.
  class ReadOnly<int argNo> : IntrinsicProperty {
@@ -397,9 +402,9 @@ def int_objc_arc_annotation_bottomup_bbend  : Intrinsic<[],
  
  //===--------------------- Code Generator Intrinsics ----------------------===//
  //
-def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
  def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
  def int_sponentry  : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
  def int_read_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
                                     [IntrReadMem], "llvm.read_register">;
@@ -417,7 +422,7 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
  // to an escaped allocation indicated by the index.
  def int_localrecover : Intrinsic<[llvm_ptr_ty],
                                   [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
-                                 [IntrNoMem]>;
+                                 [IntrNoMem, ImmArg<2>]>;
  
  // Given the frame pointer passed into an SEH filter function, returns a
  // pointer to the local variable area suitable for use with llvm.localrecover.
@@ -443,7 +448,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
  // memory while not impeding optimization.
  def int_prefetch
      : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
-                [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0> ]>;
+                [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+                  ImmArg<1>, ImmArg<2>]>;
  def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
  
  def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
@@ -484,16 +490,17 @@ def int_memcpy  : Intrinsic<[],
                               [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
                                llvm_i1_ty],
                              [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
-                             WriteOnly<0>, ReadOnly<1>]>;
+                             WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
  def int_memmove : Intrinsic<[],
                              [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
                               llvm_i1_ty],
                              [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
-                             ReadOnly<1>]>;
+                             ReadOnly<1>, ImmArg<3>]>;
  def int_memset  : Intrinsic<[],
                              [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
                               llvm_i1_ty],
-                            [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+                            [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>,
+                            ImmArg<3>]>;
  
  // FIXME: Add version of these floating point intrinsics which allow non-default
  // rounding modes and FP exception handling.
@@ -560,7 +567,7 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
  def int_objectsize : Intrinsic<[llvm_anyint_ty],
                                 [llvm_anyptr_ty, llvm_i1_ty,
                                  llvm_i1_ty, llvm_i1_ty],
-                               [IntrNoMem, IntrSpeculatable]>,
+                               [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
                                 GCCBuiltin<"__builtin_object_size">;
  
  //===--------------- Constrained Floating Point Intrinsics ----------------===//
@@ -687,8 +694,8 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
  
  //===------------------------- Expect Intrinsics --------------------------===//
  //
-def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
-                                              LLVMMatchType<0>], [IntrNoMem]>;
+def int_expect : Intrinsic<[llvm_anyint_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, ImmArg<1>]>;
  
  //===-------------------- Bit Manipulation Intrinsics ---------------------===//
  //
@@ -697,8 +704,6 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
  let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
    def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
    def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
-  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
    def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
    def int_fshl : Intrinsic<[llvm_anyint_ty],
        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
@@ -706,6 +711,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
  }
  
+let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+}
+
  //===------------------------ Debugger Intrinsics -------------------------===//
  //
  
@@ -848,27 +858,27 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty],
  //
  def int_smul_fix : Intrinsic<[llvm_anyint_ty],
                               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
-                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+                             [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
  
  def int_umul_fix : Intrinsic<[llvm_anyint_ty],
                               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
-                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+                             [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
  
  //===------------------------- Memory Use Markers -------------------------===//
  //
  def int_lifetime_start  : Intrinsic<[],
                                      [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
  def int_lifetime_end    : Intrinsic<[],
                                      [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
  def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
                                      [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
  def int_invariant_end   : Intrinsic<[],
                                      [llvm_descriptor_ty, llvm_i64_ty,
                                       llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<2>]>;
+                                    [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>;
  
  // launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
  // because it would cause CSE of two barriers with the same argument.
@@ -915,13 +925,13 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
                                 [llvm_i64_ty, llvm_i32_ty,
                                  llvm_anyptr_ty, llvm_i32_ty,
                                  llvm_i32_ty, llvm_vararg_ty],
-                                [Throws]>;
+                                [Throws, ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>]>;
  
  def int_experimental_gc_result   : Intrinsic<[llvm_any_ty], [llvm_token_ty],
                                               [IntrReadMem]>;
  def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
                                  [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
-                                [IntrReadMem]>;
+                                [IntrReadMem, ImmArg<1>, ImmArg<2>]>;
  
  //===------------------------ Coroutine Intrinsics ---------------===//
  // These are documented in docs/Coroutines.rst
@@ -1018,23 +1028,24 @@ def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
                                        LLVMAnyPointerType<LLVMMatchType<0>>,
                                        llvm_i32_ty,
                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
-                                 [IntrArgMemOnly]>;
+                                 [IntrArgMemOnly, ImmArg<2>]>;
  
  def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
                                   [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
                                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
-                                 [IntrReadMem, IntrArgMemOnly]>;
+                                 [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
  
  def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
                                   [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
                                    LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                    LLVMMatchType<0>],
-                                 [IntrReadMem]>;
+                                 [IntrReadMem, ImmArg<1>]>;
  
  def int_masked_scatter: Intrinsic<[],
                                    [llvm_anyvector_ty,
                                     LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
-                                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>]>;
+                                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                                   [ImmArg<2>]>;
  
  def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
                                       [LLVMPointerToElt<0>,
@@ -1065,7 +1076,7 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
                                   [IntrReadMem, IntrArgMemOnly]>;
  
  def int_hwasan_check_memaccess :
-  Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly]>;
+  Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
  
  // Xray intrinsics
  //===----------------------------------------------------------------------===//
@@ -1090,7 +1101,7 @@ def int_memcpy_element_unordered_atomic
                  ],
                  [
                    IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
-                  ReadOnly<1>
+                  ReadOnly<1>, ImmArg<3>
                  ]>;
  
  // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize)
@@ -1101,13 +1112,13 @@ def int_memmove_element_unordered_atomic
                  ],
                  [
                    IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
-                  ReadOnly<1>
+                  ReadOnly<1>, ImmArg<3>
                  ]>;
  
  // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
  def int_memset_element_unordered_atomic
      : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
-                [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
+                [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
  
  //===------------------------ Reduction Intrinsics ------------------------===//
  //
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td

index 5b2c7c8c7008c3f1cb7787a227224e1607c9e2c9..46e025d5ec484e7a5f5d8b52b84d257bae49aff6 100644 (file)
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -177,7 +177,7 @@ def int_amdgcn_implicit_buffer_ptr :
  // This is always moved to the beginning of the basic block.
  def int_amdgcn_init_exec : Intrinsic<[],
    [llvm_i64_ty],      // 64-bit literal constant
-  [IntrConvergent]>;
+  [IntrConvergent, ImmArg<0>]>;
  
  // Set EXEC according to a thread count packed in an SGPR input:
  //    thread_count = (input >> bitoffset) & 0x7f;
@@ -195,9 +195,9 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
  // The first parameter is s_sendmsg immediate (i16),
  // the second one is copied to m0
  def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
  def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
  
  def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
    Intrinsic<[], [], [IntrConvergent]>;
@@ -206,7 +206,7 @@ def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
    Intrinsic<[], [], [IntrConvergent]>;
  
  def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
-  Intrinsic<[], [llvm_i32_ty], []>;
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
  
  def int_amdgcn_div_scale : Intrinsic<
    // 1st parameter: Numerator
@@ -215,7 +215,7 @@ def int_amdgcn_div_scale : Intrinsic<
    //                second. (0 = first, 1 = second).
    [llvm_anyfloat_ty, llvm_i1_ty],
    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [IntrNoMem, IntrSpeculatable, ImmArg<2>]
  >;
  
  def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
@@ -373,7 +373,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
    llvm_i32_ty, // ordering
    llvm_i32_ty, // scope
    llvm_i1_ty], // isVolatile
-  [IntrArgMemOnly, NoCapture<0>], "",
+  [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "",
    [SDNPMemOperand]
  >;
  
@@ -388,7 +388,7 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
      llvm_i32_ty, // ordering
      llvm_i32_ty, // scope
      llvm_i1_ty], // isVolatile
-    [IntrArgMemOnly, NoCapture<0>]
+    [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]
  >;
  
  class AMDGPUDSOrderedIntrinsic : Intrinsic<
@@ -403,14 +403,17 @@ class AMDGPUDSOrderedIntrinsic : Intrinsic<
     llvm_i32_ty, // ordered count index (OA index), also added to the address
     llvm_i1_ty,  // wave release, usually set to 1
     llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
-  [NoCapture<0>]
+  [NoCapture<0>,
+   ImmArg<2>, ImmArg<3>, ImmArg<4>,
+   ImmArg<5>, ImmArg<6>, ImmArg<7>
+  ]
  >;
  
  class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
    [llvm_i32_ty],
    [llvm_anyptr_ty, // LDS or GDS ptr
     llvm_i1_ty], // isVolatile
-   [IntrConvergent, IntrArgMemOnly, NoCapture<0>]
+   [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>]
  >;
  
  def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
@@ -649,6 +652,19 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES
    let LodClampMip = "mip";
  }
  
+// Helper class for figuring out image intrinsic argument indexes.
+class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
+  int NumDataArgs = !size(P_.DataArgs);
+  int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
+  int NumVAddrArgs = !size(P_.AddrArgs);
+  int NumRSrcArgs = 1;
+  int NumSampArgs = !if(P_.IsSample, 2, 0);
+  int DmaskArgIndex = NumDataArgs;
+  int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
+  int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
+  int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
+}
+
  // All dimension-aware intrinsics are derived from this class.
  class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                                list<IntrinsicProperty> props,
@@ -663,7 +679,12 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                          llvm_i1_ty], []),        // unorm(imm)
        [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
         llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-      props, "", sdnodeprops>,
+     !listconcat(props,
+          !if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]),
+          !if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []),
+          [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>,
+           ImmArg<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>]),
+      "", sdnodeprops>,
    AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
                             !if(P_.IsAtomic, 0, 1)), 1> {
    AMDGPUDimProfile P = P_;
@@ -825,7 +846,7 @@ class AMDGPUBufferLoad : Intrinsic <
     llvm_i32_ty,       // offset(SGPR/VGPR/imm)
     llvm_i1_ty,        // glc(imm)
     llvm_i1_ty],       // slc(imm)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+  [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
  def int_amdgcn_buffer_load : AMDGPUBufferLoad;
@@ -835,7 +856,7 @@ def int_amdgcn_s_buffer_load : Intrinsic <
    [llvm_v4i32_ty,     // rsrc(SGPR)
     llvm_i32_ty,       // byte offset(SGPR/VGPR/imm)
     llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc)
-  [IntrNoMem]>,
+  [IntrNoMem, ImmArg<2>]>,
    AMDGPURsrcIntrinsic<0>;
  
  class AMDGPUBufferStore : Intrinsic <
@@ -846,7 +867,7 @@ class AMDGPUBufferStore : Intrinsic <
     llvm_i32_ty,       // offset(SGPR/VGPR/imm)
     llvm_i1_ty,        // glc(imm)
     llvm_i1_ty],       // slc(imm)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+  [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
  def int_amdgcn_buffer_store : AMDGPUBufferStore;
@@ -864,7 +885,7 @@ class AMDGPURawBufferLoad : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+  [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
  def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
@@ -876,7 +897,7 @@ class AMDGPUStructBufferLoad : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+  [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
  def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
@@ -888,7 +909,7 @@ class AMDGPURawBufferStore : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+  [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
  def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
@@ -901,7 +922,7 @@ class AMDGPUStructBufferStore : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+  [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
  def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
@@ -913,7 +934,7 @@ class AMDGPURawBufferAtomic : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1, 0>;
  def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
  def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
@@ -933,7 +954,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<2, 0>;
  
  class AMDGPUStructBufferAtomic : Intrinsic <
@@ -944,7 +965,7 @@ class AMDGPUStructBufferAtomic : Intrinsic <
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1, 0>;
  def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
  def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
@@ -965,7 +986,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
     llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
     llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
     llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<6>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<2, 0>;
  
  // Obsolescent tbuffer intrinsics.
@@ -980,7 +1001,8 @@ def int_amdgcn_tbuffer_load : Intrinsic <
       llvm_i32_ty,     // nfmt(imm)
       llvm_i1_ty,     // glc(imm)
       llvm_i1_ty],    // slc(imm)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+    [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>,
+     ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  
  def int_amdgcn_tbuffer_store : Intrinsic <
@@ -995,7 +1017,8 @@ def int_amdgcn_tbuffer_store : Intrinsic <
       llvm_i32_ty,    // nfmt(imm)
       llvm_i1_ty,     // glc(imm)
       llvm_i1_ty],    // slc(imm)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+    [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>,
+     ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  
  // New tbuffer intrinsics, with:
@@ -1009,7 +1032,7 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic <
       llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
       llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
       llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+    [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  
  def int_amdgcn_raw_tbuffer_store : Intrinsic <
@@ -1020,7 +1043,7 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic <
       llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
       llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
       llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+    [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  
  def int_amdgcn_struct_tbuffer_load : Intrinsic <
@@ -1031,7 +1054,7 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic <
       llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
       llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
       llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+    [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<0>;
  
  def int_amdgcn_struct_tbuffer_store : Intrinsic <
@@ -1043,7 +1066,7 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
       llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
       llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
       llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+    [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1>;
  
  class AMDGPUBufferAtomic : Intrinsic <
@@ -1053,7 +1076,7 @@ class AMDGPUBufferAtomic : Intrinsic <
     llvm_i32_ty,       // vindex(VGPR)
     llvm_i32_ty,       // offset(SGPR/VGPR/imm)
     llvm_i1_ty],       // slc(imm)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<4>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<1, 0>;
  def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
  def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
@@ -1073,7 +1096,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
     llvm_i32_ty,       // vindex(VGPR)
     llvm_i32_ty,       // offset(SGPR/VGPR/imm)
     llvm_i1_ty],       // slc(imm)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
    AMDGPURsrcIntrinsic<2, 0>;
  
  } // defset AMDGPUBufferIntrinsics
@@ -1090,7 +1113,7 @@ def int_amdgcn_exp : Intrinsic <[], [
    llvm_i1_ty,        // done
    llvm_i1_ty         // vm
    ],
-  []
+  [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>]
  >;
  
  // exp with compr bit set.
@@ -1101,7 +1124,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
    LLVMMatchType<0>,  // src1
    llvm_i1_ty,        // done
    llvm_i1_ty],       // vm
-  []
+  [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>]
  >;
  
  def int_amdgcn_buffer_wbinvl1_sc :
@@ -1122,23 +1145,23 @@ def int_amdgcn_s_memtime :
  
  def int_amdgcn_s_sleep :
    GCCBuiltin<"__builtin_amdgcn_s_sleep">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
  }
  
  def int_amdgcn_s_incperflevel :
    GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
  }
  
  def int_amdgcn_s_decperflevel :
    GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
  }
  
  def int_amdgcn_s_getreg :
    GCCBuiltin<"__builtin_amdgcn_s_getreg">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
-  [IntrReadMem, IntrSpeculatable]
+  [IntrReadMem, IntrSpeculatable, ImmArg<0>]
  >;
  
  // int_amdgcn_s_getpc is provided to allow a specific style of position
@@ -1208,7 +1231,8 @@ def int_amdgcn_mbcnt_hi :
  // llvm.amdgcn.ds.swizzle src offset
  def int_amdgcn_ds_swizzle :
    GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<1>]>;
  
  def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
@@ -1276,11 +1300,11 @@ def int_amdgcn_cvt_pk_u8_f32 :
  
  def int_amdgcn_icmp :
    Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
-            [IntrNoMem, IntrConvergent]>;
+            [IntrNoMem, IntrConvergent, ImmArg<2>]>;
  
  def int_amdgcn_fcmp :
    Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
-            [IntrNoMem, IntrConvergent]>;
+            [IntrNoMem, IntrConvergent, ImmArg<2>]>;
  
  def int_amdgcn_readfirstlane :
    GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
@@ -1370,7 +1394,8 @@ def int_amdgcn_buffer_wbinvl1_vol :
  def int_amdgcn_mov_dpp :
    Intrinsic<[llvm_anyint_ty],
              [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-             llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+             llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>,
+                           ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
  
  // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  // Should be equivalent to:
@@ -1378,8 +1403,10 @@ def int_amdgcn_mov_dpp :
  // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  def int_amdgcn_update_dpp :
    Intrinsic<[llvm_anyint_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
-             llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
+            llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
+             [IntrNoMem, IntrConvergent,
+              ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
  
  def int_amdgcn_s_dcache_wb :
    GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
@@ -1419,7 +1446,7 @@ def int_amdgcn_fdot2 :
        llvm_float_ty, // %c
        llvm_i1_ty     // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
@@ -1434,7 +1461,7 @@ def int_amdgcn_sdot2 :
        llvm_i32_ty,   // %c
        llvm_i1_ty     // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
@@ -1449,7 +1476,7 @@ def int_amdgcn_udot2 :
        llvm_i32_ty,   // %c
        llvm_i1_ty     // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
@@ -1464,7 +1491,7 @@ def int_amdgcn_sdot4 :
        llvm_i32_ty, // %c
        llvm_i1_ty   // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
@@ -1479,7 +1506,7 @@ def int_amdgcn_udot4 :
        llvm_i32_ty, // %c
        llvm_i1_ty   // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
@@ -1495,7 +1522,7 @@ def int_amdgcn_sdot8 :
        llvm_i32_ty, // %c
        llvm_i1_ty   // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
@@ -1511,7 +1538,7 @@ def int_amdgcn_udot8 :
        llvm_i32_ty, // %c
        llvm_i1_ty   // %clamp
      ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
    >;
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp

index 461117c92b86f5255d8a947ef246dc3334a6642e..9e5bfb1f72b29becb065b5960b8f5cd709b54def 100644 (file)
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -684,6 +684,7 @@ lltok::Kind LLLexer::LexIdentifier() {
    KEYWORD(uwtable);
    KEYWORD(writeonly);
    KEYWORD(zeroext);
+  KEYWORD(immarg);
  
    KEYWORD(type);
    KEYWORD(opaque);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp

index 2a36bb189840964e1fcd3975412dfc6997aeb4be..cba579d59a430ae08cbb36a5d43bc56e753f6c1b 100644 (file)
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1310,6 +1310,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
      case lltok::kw_sret:
      case lltok::kw_swifterror:
      case lltok::kw_swiftself:
+    case lltok::kw_immarg:
        HaveError |=
          Error(Lex.getLoc(),
                "invalid use of parameter-only attribute on a function");
@@ -1603,6 +1604,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
      case lltok::kw_swiftself:       B.addAttribute(Attribute::SwiftSelf); break;
      case lltok::kw_writeonly:       B.addAttribute(Attribute::WriteOnly); break;
      case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
+    case lltok::kw_immarg:          B.addAttribute(Attribute::ImmArg); break;
  
      case lltok::kw_alignstack:
      case lltok::kw_alwaysinline:
@@ -1697,6 +1699,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
      case lltok::kw_sret:
      case lltok::kw_swifterror:
      case lltok::kw_swiftself:
+    case lltok::kw_immarg:
        HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
        break;
  
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h

index 88eeae11a4b663385f7d27335b44e177a4d7eaae..50bdf05092a83cfee49794a41671b9e1d6e654d5 100644 (file)
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -226,6 +226,7 @@ enum Kind {
    kw_uwtable,
    kw_writeonly,
    kw_zeroext,
+  kw_immarg,
  
    kw_type,
    kw_opaque,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp

index b94bb66a0d66c1043aba7b90f80acfb512b640d1..b9f93dc522fab04a68954543bf992c9950437e5c 100644 (file)
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1188,6 +1188,8 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
    case Attribute::ShadowCallStack: return 1ULL << 59;
    case Attribute::SpeculativeLoadHardening:
      return 1ULL << 60;
+  case Attribute::ImmArg:
+    return 1ULL << 61;
    case Attribute::Dereferenceable:
      llvm_unreachable("dereferenceable attribute not supported in raw format");
      break;
@@ -1424,6 +1426,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
      return Attribute::WriteOnly;
    case bitc::ATTR_KIND_Z_EXT:
      return Attribute::ZExt;
+  case bitc::ATTR_KIND_IMMARG:
+    return Attribute::ImmArg;
    }
  }
  
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp

index a15ad55b0a3e49a5be8a891a752bf0efc61f0919..1d6480c574503bd2ef16fea5d680721fc2124292 100644 (file)
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -712,6 +712,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
      return bitc::ATTR_KIND_WRITEONLY;
    case Attribute::ZExt:
      return bitc::ATTR_KIND_Z_EXT;
+  case Attribute::ImmArg:
+    return bitc::ATTR_KIND_IMMARG;
    case Attribute::EndAttrKinds:
      llvm_unreachable("Can not encode end-attribute kinds marker.");
    case Attribute::None:
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp

index a474d5740fea7e381c2bf8b80c524e54670cc5e5..0be09a05e82abb24786d2296bc5639c2e03ca321 100644 (file)
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -350,6 +350,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
      return "zeroext";
    if (hasAttribute(Attribute::Cold))
      return "cold";
+  if (hasAttribute(Attribute::ImmArg))
+    return "immarg";
  
    // FIXME: These should be output like this:
    //
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp

index 55d3a3236108824eadc4441f9bdeda2fe95778ab..44095bf05e19e2d0bac33fb020f04e522e6d1f3e 100644 (file)
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -500,7 +500,7 @@ private:
                              const Value *V);
    void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V);
    void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
-                           const Value *V);
+                           const Value *V, bool IsIntrinsic);
    void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
  
    void visitConstantExprsRecursively(const Constant *EntryC);
@@ -1562,6 +1562,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
  
    verifyAttributeTypes(Attrs, /*IsFunction=*/false, V);
  
+  if (Attrs.hasAttribute(Attribute::ImmArg)) {
+    Assert(Attrs.getNumAttributes() == 1,
+           "Attribute 'immarg' is incompatible with other attributes", V);
+  }
+
    // Check for mutually incompatible attributes.  Only inreg is compatible with
    // sret.
    unsigned AttrCount = 0;
@@ -1649,7 +1654,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
  // Check parameter attributes against a function type.
  // The value V is printed in error messages.
  void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
-                                   const Value *V) {
+                                   const Value *V, bool IsIntrinsic) {
    if (Attrs.isEmpty())
      return;
  
@@ -1686,6 +1691,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
      Type *Ty = FT->getParamType(i);
      AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
  
+    if (!IsIntrinsic) {
+      Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
+             "immarg attribute only applies to intrinsics",V);
+    }
+
      verifyParameterAttrs(ArgAttrs, Ty, V);
  
      if (ArgAttrs.hasAttribute(Attribute::Nest)) {
@@ -1904,16 +1914,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
           "reordering restrictions required by safepoint semantics",
           Call);
  
-  const Value *IDV = Call.getArgOperand(0);
-  Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
-         Call);
-
-  const Value *NumPatchBytesV = Call.getArgOperand(1);
-  Assert(isa<ConstantInt>(NumPatchBytesV),
-         "gc.statepoint number of patchable bytes must be a constant integer",
-         Call);
    const int64_t NumPatchBytes =
-      cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
+      cast<ConstantInt>(Call.getArgOperand(1))->getSExtValue();
    assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
    Assert(NumPatchBytes >= 0,
           "gc.statepoint number of patchable bytes must be "
@@ -1926,12 +1928,7 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
           "gc.statepoint callee must be of function pointer type", Call, Target);
    FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
  
-  const Value *NumCallArgsV = Call.getArgOperand(3);
-  Assert(isa<ConstantInt>(NumCallArgsV),
-         "gc.statepoint number of arguments to underlying call "
-         "must be constant integer",
-         Call);
-  const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
+  const int NumCallArgs = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
    Assert(NumCallArgs >= 0,
           "gc.statepoint number of arguments to underlying call "
           "must be positive",
@@ -1950,10 +1947,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
      Assert(NumCallArgs == NumParams,
             "gc.statepoint mismatch in number of call args", Call);
  
-  const Value *FlagsV = Call.getArgOperand(4);
-  Assert(isa<ConstantInt>(FlagsV),
-         "gc.statepoint flags must be constant integer", Call);
-  const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
+  const uint64_t Flags
+    = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
    Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
           "unknown flag used in gc.statepoint flags argument", Call);
  
@@ -2130,8 +2125,11 @@ void Verifier::visitFunction(const Function &F) {
    Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
           "Attribute after last parameter!", &F);
  
+  bool isLLVMdotName = F.getName().size() >= 5 &&
+                       F.getName().substr(0, 5) == "llvm.";
+
    // Check function attributes.
-  verifyFunctionAttrs(FT, Attrs, &F);
+  verifyFunctionAttrs(FT, Attrs, &F, isLLVMdotName);
  
    // On function declarations/definitions, we do not support the builtin
    // attribute. We do not check this in VerifyFunctionAttrs since that is
@@ -2170,9 +2168,6 @@ void Verifier::visitFunction(const Function &F) {
      break;
    }
  
-  bool isLLVMdotName = F.getName().size() >= 5 &&
-                       F.getName().substr(0, 5) == "llvm.";
-
    // Check that the argument values match the function type for this function...
    unsigned i = 0;
    for (const Argument &Arg : F.args()) {
@@ -2800,17 +2795,21 @@ void Verifier::visitCallBase(CallBase &Call) {
    Assert(verifyAttributeCount(Attrs, Call.arg_size()),
           "Attribute after last parameter!", Call);
  
+  bool IsIntrinsic = Call.getCalledFunction() &&
+                     Call.getCalledFunction()->getName().startswith("llvm.");
+
+  Function *Callee
+    = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
+
    if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
      // Don't allow speculatable on call sites, unless the underlying function
      // declaration is also speculatable.
-    Function *Callee =
-        dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
      Assert(Callee && Callee->isSpeculatable(),
             "speculatable attribute may not apply to call sites", Call);
    }
  
    // Verify call attributes.
-  verifyFunctionAttrs(FTy, Attrs, &Call);
+  verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic);
  
    // Conservatively check the inalloca argument.
    // We have a bug if we can find that there is an underlying alloca without
@@ -2825,7 +2824,7 @@ void Verifier::visitCallBase(CallBase &Call) {
    // For each argument of the callsite, if it has the swifterror argument,
    // make sure the underlying alloca/parameter it comes from has a swifterror as
    // well.
-  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
      if (Call.paramHasAttr(i, Attribute::SwiftError)) {
        Value *SwiftErrorArg = Call.getArgOperand(i);
        if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
@@ -2842,6 +2841,21 @@ void Verifier::visitCallBase(CallBase &Call) {
               Call);
      }
  
+    if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
+      // Don't allow immarg on call sites, unless the underlying declaration
+      // also has the matching immarg.
+      Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
+             "immarg may not apply only to call sites",
+             Call.getArgOperand(i), Call);
+    }
+
+    if (Call.paramHasAttr(i, Attribute::ImmArg)) {
+      Value *ArgVal = Call.getArgOperand(i);
+      Assert(isa<ConstantInt>(ArgVal) || isa<ConstantFP>(ArgVal),
+             "immarg operand has non-immediate parameter", ArgVal, Call);
+    }
+  }
+
    if (FTy->isVarArg()) {
      // FIXME? is 'nest' even legal here?
      bool SawNest = false;
@@ -2891,8 +2905,7 @@ void Verifier::visitCallBase(CallBase &Call) {
    }
  
    // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (!Call.getCalledFunction() ||
-      !Call.getCalledFunction()->getName().startswith("llvm.")) {
+  if (!IsIntrinsic) {
      for (Type *ParamTy : FTy->params()) {
        Assert(!ParamTy->isMetadataTy(),
               "Function has metadata parameter but isn't an intrinsic", Call);
@@ -4181,13 +4194,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
        "an array");
      break;
    }
-  case Intrinsic::ctlz:  // llvm.ctlz
-  case Intrinsic::cttz:  // llvm.cttz
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
-           "is_zero_undef argument of bit counting intrinsics must be a "
-           "constant int",
-           Call);
-    break;
    case Intrinsic::experimental_constrained_fadd:
    case Intrinsic::experimental_constrained_fsub:
    case Intrinsic::experimental_constrained_fmul:
@@ -4243,9 +4249,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
               "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
               Call);
      }
-    Assert(isa<ConstantInt>(Call.getArgOperand(3)),
-           "isvolatile argument of memory intrinsics must be a constant int",
-           Call);
+
      break;
    }
    case Intrinsic::memcpy_element_unordered_atomic:
@@ -4254,11 +4258,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
      const auto *AMI = cast<AtomicMemIntrinsic>(&Call);
  
      ConstantInt *ElementSizeCI =
-        dyn_cast<ConstantInt>(AMI->getRawElementSizeInBytes());
-    Assert(ElementSizeCI,
-           "element size of the element-wise unordered atomic memory "
-           "intrinsic must be a constant int",
-           Call);
+        cast<ConstantInt>(AMI->getRawElementSizeInBytes());
      const APInt &ElementSizeVal = ElementSizeCI->getValue();
      Assert(ElementSizeVal.isPowerOf2(),
             "element size of the element-wise atomic memory intrinsic "
@@ -4313,28 +4313,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
             Call);
      break;
    case Intrinsic::prefetch:
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)) &&
-               isa<ConstantInt>(Call.getArgOperand(2)) &&
-               cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
-               cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+    Assert(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+           cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
             "invalid arguments to llvm.prefetch", Call);
      break;
    case Intrinsic::stackprotector:
      Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
             "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
      break;
-  case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-  case Intrinsic::invariant_start:
-    Assert(isa<ConstantInt>(Call.getArgOperand(0)),
-           "size argument of memory use markers must be a constant integer",
-           Call);
-    break;
-  case Intrinsic::invariant_end:
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
-           "llvm.invariant.end parameter #2 must be a constant integer", Call);
-    break;
-
    case Intrinsic::localescape: {
      BasicBlock *BB = Call.getParent();
      Assert(BB == &BB->getParent()->front(),
@@ -4359,9 +4345,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
             "llvm.localrecover first "
             "argument must be function defined in this module",
             Call);
-    auto *IdxArg = dyn_cast<ConstantInt>(Call.getArgOperand(2));
-    Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
-           Call);
+    auto *IdxArg = cast<ConstantInt>(Call.getArgOperand(2));
      auto &Entry = FrameEscapeInfo[Fn];
      Entry.second = unsigned(
          std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -4606,8 +4590,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
             "second operand of [us]mul_fix must be an int type or vector "
             "of ints");
  
-    auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
-    Assert(Op3, "third argument of [us]mul_fix must be a constant integer");
+    auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
      Assert(Op3->getType()->getBitWidth() <= 32,
             "third argument of [us]mul_fix must fit within 32 bits");
  
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 69e05c311d2ed89d2fb2579e77c04b8f016bae6b..9205c20cefc957d65a2ce9bb7b129e24c5e4b243 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -920,8 +920,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      Info.align = 0;
      Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
  
-    const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
-    if (!Vol || !Vol->isZero())
+    const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
+    if (!Vol->isZero())
        Info.flags |= MachineMemOperand::MOVolatile;
  
      return true;
@@ -934,8 +934,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      Info.align = 0;
      Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
  
-    const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(1));
-    if (!Vol || !Vol->isZero())
+    const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
+    if (!Vol->isZero())
        Info.flags |= MachineMemOperand::MOVolatile;
  
      return true;
@@ -3741,10 +3741,7 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
  static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
                                    SDNode *N, SelectionDAG &DAG) {
    EVT VT = N->getValueType(0);
-  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
-  if (!CD)
-    return DAG.getUNDEF(VT);
-
+  const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
    int CondCode = CD->getSExtValue();
    if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
        CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
@@ -3775,9 +3772,7 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
  static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
                                    SDNode *N, SelectionDAG &DAG) {
    EVT VT = N->getValueType(0);
-  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
-  if (!CD)
-    return DAG.getUNDEF(VT);
+  const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
  
    int CondCode = CD->getSExtValue();
    if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
@@ -4618,9 +4613,7 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
  
  static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
                               SDValue *GLC, SDValue *SLC) {
-  auto CachePolicyConst = dyn_cast<ConstantSDNode>(CachePolicy.getNode());
-  if (!CachePolicyConst)
-    return false;
+  auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
  
    uint64_t Value = CachePolicyConst->getZExtValue();
    SDLoc DL(CachePolicy);
@@ -4721,9 +4714,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
  
  static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
                           SDValue *LWE, bool &IsTexFail) {
-  auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
-  if (!TexFailCtrlConst)
-    return false;
+  auto TexFailCtrlConst = cast<ConstantSDNode>(TexFailCtrl.getNode());
  
    uint64_t Value = TexFailCtrlConst->getZExtValue();
    if (Value) {
@@ -4786,9 +4777,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
      }
    } else {
      unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
-    auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
-    if (!DMaskConst)
-      return Op;
+    auto DMaskConst = cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
      DMask = DMaskConst->getZExtValue();
      DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
  
@@ -4902,9 +4891,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
      CtrlIdx = AddrIdx + NumVAddrs + 1;
    } else {
      auto UnormConst =
-        dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
-    if (!UnormConst)
-      return Op;
+        cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
  
      Unorm = UnormConst->getZExtValue() ? True : False;
      CtrlIdx = AddrIdx + NumVAddrs + 3;
@@ -5357,10 +5344,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
                         Op.getOperand(1), Op.getOperand(2));
    case Intrinsic::amdgcn_div_scale: {
-    // 3rd parameter required to be a constant.
-    const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-    if (!Param)
-      return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL);
+    const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
  
      // Translate to the operands expected by the machine instruction. The
      // first parameter must be the same as the first instruction.
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp

index 02cf90613c64b35dc3ca90c0478502745ffe916e..dc8fd4766d49cd616c1381e7d8dff2f49d968819 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3576,10 +3576,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
    }
    case Intrinsic::amdgcn_exp:
    case Intrinsic::amdgcn_exp_compr: {
-    ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
-    if (!En) // Illegal.
-      break;
-
+    ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1));
      unsigned EnBits = En->getZExtValue();
      if (EnBits == 0xf)
        break; // All inputs enabled.
@@ -3669,10 +3666,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
    }
    case Intrinsic::amdgcn_icmp:
    case Intrinsic::amdgcn_fcmp: {
-    const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
-    if (!CC)
-      break;
-
+    const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2));
      // Guard against invalid arguments.
      int64_t CCVal = CC->getZExtValue();
      bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
@@ -3822,11 +3816,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
    case Intrinsic::amdgcn_update_dpp: {
      Value *Old = II->getArgOperand(0);
  
-    auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
-    auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
-    auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
-    if (!BC || !RM || !BM ||
-        BC->isZeroValue() ||
+    auto BC = cast<ConstantInt>(II->getArgOperand(5));
+    auto RM = cast<ConstantInt>(II->getArgOperand(3));
+    auto BM = cast<ConstantInt>(II->getArgOperand(4));
+    if (BC->isZeroValue() ||
          RM->getZExtValue() != 0xF ||
          BM->getZExtValue() != 0xF ||
          isa<UndefValue>(Old))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

index 6f9aadea55dcf9439af629694836d12651708cfe..8e97da68819339c32f299f275c1743c55088faf6 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -982,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
      // below.
      DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
    } else {
-    ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
-    if (!DMask)
-      return nullptr; // non-constant dmask is not supported by codegen
-
+    ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
      unsigned DMaskVal = DMask->getZExtValue() & 0xf;
  
      // Mask off values that are undefined because the dmask doesn't cover them
@@ -1639,12 +1636,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
      default: {
        if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) {
-        LLVM_DEBUG(
-          Value *TFC = II->getArgOperand(II->getNumOperands() - 2);
-          assert(!isa<ConstantInt>(TFC) ||
-                 dyn_cast<ConstantInt>(TFC)->getZExtValue() == 0);
-        );
-
+        assert(cast<ConstantInt>(
+                 II->getArgOperand(
+                   II->getNumOperands() - 2))->getZExtValue() == 0);
          return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
        }
  
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp

index 023ab76d0012d2c0d509b3fdbdee433b09d9706a..4db895feaa3434aa660f3335a84477f12aac4def 100644 (file)
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -798,6 +798,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
        case Attribute::SwiftSelf:
        case Attribute::WriteOnly:
        case Attribute::ZExt:
+      case Attribute::ImmArg:
        case Attribute::EndAttrKinds:
          continue;
        // Those attributes should be safe to propagate to the extracted function.
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll

index bd2a779800a6eba61b649373258129fe6d8947ce..175bac812a01070c9ecac9cbf59f983fb57e9186 100644 (file)
--- a/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -146,5 +146,5 @@ define void @tests.lifetime.start.end() {
  ; CHECK: declare i32 @llvm.objectsize.i32.p0i8
  
  
-; CHECK: declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
-; CHECK: declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+; CHECK: declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+; CHECK: declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
diff --git a/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll b/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll

new file mode 100644 (file)

index 0000000..59308ef
--- /dev/null
+++ b/test/Assembler/autoupgrade-invalid-mem-intrinsics.ll
@@ -0,0 +1,15 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check that remangling code doesn't fail on an intrinsic with wrong signature
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memset.i64
+declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memcpy.i64
+declare void @llvm.memcpy.i64(i8* nocapture, i8, i64) nounwind
+
+; CHECK: Attribute after last parameter!
+; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memmove.i64
+declare void @llvm.memmove.i64(i8* nocapture, i8, i64) nounwind
diff --git a/test/Assembler/immarg-param-attribute.ll b/test/Assembler/immarg-param-attribute.ll

new file mode 100644 (file)

index 0000000..f31443a
--- /dev/null
+++ b/test/Assembler/immarg-param-attribute.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+
+; CHECK: declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+
+; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.i32(
+define void @call_llvm.test.immarg.intrinsic.i32() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 0)
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 0)
+
+  ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1)
+  call void @llvm.test.immarg.intrinsic.i32(i32 1)
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5)
+  call void @llvm.test.immarg.intrinsic.i32(i32 add (i32 2, i32 3))
+
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* null to i32))
+  ret void
+}
+
+; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.f32(
+define void @call_llvm.test.immarg.intrinsic.f32() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.f32(float 1.000000e+00)
+  call void @llvm.test.immarg.intrinsic.f32(float 1.0)
+  ret void
+}
+
+define void @on_callsite_and_declaration() {
+  ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
+  call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
+  ret void
+}
diff --git a/test/Assembler/invalid-immarg.ll b/test/Assembler/invalid-immarg.ll

new file mode 100644 (file)

index 0000000..65f6ba6
--- /dev/null
+++ b/test/Assembler/invalid-immarg.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.byval(i32* byval immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.inalloca(i32* inalloca immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.inreg(i32 inreg immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.nest(i32* nest immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.sret(i32* sret immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.zeroext(i32 zeroext immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.signext(i32 signext immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.returned(i32 returned immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.noalias(i32* noalias immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.readnone(i32* readnone immarg)
+
+; CHECK: Attribute 'immarg' is incompatible with other attributes
+declare void @llvm.immarg.readonly(i32* readonly immarg)
diff --git a/test/Assembler/invalid-immarg2.ll b/test/Assembler/invalid-immarg2.ll

new file mode 100644 (file)

index 0000000..f458910
--- /dev/null
+++ b/test/Assembler/invalid-immarg2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: invalid use of parameter-only attribute on a function
+declare void @llvm.immarg.func() immarg
diff --git a/test/Assembler/invalid-immarg3.ll b/test/Assembler/invalid-immarg3.ll

new file mode 100644 (file)

index 0000000..c03d7fa
--- /dev/null
+++ b/test/Assembler/invalid-immarg3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: invalid use of parameter-only attribute
+declare immarg i32 @llvm.immarg.retattr(i32)
diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll

index 3c5e86eee018552b481b02126a8bc587189dac3a..95cd49f8156fd472a44c56f67ea762fa647b35ac 100644 (file)
--- a/test/Bitcode/compatibility.ll
+++ b/test/Bitcode/compatibility.ll
@@ -1681,6 +1681,10 @@ define i8** @constexpr() {
    ret i8** getelementptr inbounds ({ [4 x i8*], [4 x i8*] }, { [4 x i8*], [4 x i8*] }* null, i32 0, inrange i32 1, i32 2)
  }
  
+; immarg attribute
+declare void @llvm.test.immarg.intrinsic(i32 immarg)
+; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
+
  ; CHECK: attributes #0 = { alignstack=4 }
  ; CHECK: attributes #1 = { alignstack=8 }
  ; CHECK: attributes #2 = { alwaysinline }
diff --git a/test/Bitcode/objectsize-upgrade-7.0.ll b/test/Bitcode/objectsize-upgrade-7.0.ll

index a7e6d497676e734f5c7d347ad305c53d01dd770b..7ed6f15f6839ab1a27f6b8177ad1fec131dfc36e 100644 (file)
--- a/test/Bitcode/objectsize-upgrade-7.0.ll
+++ b/test/Bitcode/objectsize-upgrade-7.0.ll
@@ -9,4 +9,4 @@ define void @callit(i8* %ptr) {
  }
  
  declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1)
-; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg)
diff --git a/test/Bitcode/upgrade-memory-intrinsics.ll b/test/Bitcode/upgrade-memory-intrinsics.ll

index df4cb2bcd36a59c8d21a71611d21656e142b4d26..1bdee928cca1cb53711f1e2053cadeb438e0cbc4 100644 (file)
--- a/test/Bitcode/upgrade-memory-intrinsics.ll
+++ b/test/Bitcode/upgrade-memory-intrinsics.ll
@@ -27,9 +27,9 @@ define void @test2(i8* %p1, i8* %p2, i8* %p3) {
    ret void
  }
  
-; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
-; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg)
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1)
  declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll

index 722099fec33c18b5447927c55218e2b70f60a22f..6644449b14a87bb5d3eca4d962516fe4ae301482 100644 (file)
--- a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -70,8 +70,8 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> ad
  
  ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
  ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
    %bc = bitcast i64 %undef to <2 x i32>
    store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
    ret void
@@ -79,8 +79,8 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32>
  
  ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
  ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
    %bc = bitcast i64 %undef to <2 x i32>
    %elt1 = extractelement <2 x i32> %bc, i32 1
    store volatile i32 %elt1, i32 addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll

index 2616d84bcd9afa135c3824d1263c0bd32743f278..c4e631590d53e826ce30344e9f9bb2c1f2fa7145 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -12,34 +12,6 @@ declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1)
  
  declare i32 @llvm.amdgcn.workitem.id.x() #1
  
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
-; CIVI-DAG: s_mov_b32 m0
-; GFX9-NOT: m0
-define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
-; CIVI-DAG: s_mov_b32 m0
-; GFX9-NOT: m0
-define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-; Make sure no crash on invalid non-constant
-; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
-define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
  ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
  ; CIVI-DAG: s_mov_b32 m0
  ; GFX9-NOT: m0
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll

index e36c02715b7004dbcf03cf7e43203f2045a00634..2a5e81a6dd6aefefc101262ddcc1d41eef188d14 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -395,41 +395,6 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)
    ret void
  }
  
-; Undefined selector gets deleted
-; SI-LABEL: {{^}}test_div_scale_f32_val_undef_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) #0 {
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
-; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_undef_undef_undef(float addrspace(1)* %out) #0 {
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
-; SI-LABEL: {{^}}test_div_scale_f32_val_val_undef:
-; SI-NOT: v_div_scale
-define amdgpu_kernel void @test_div_scale_f32_val_val_undef(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
-
-  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 undef)
-  %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
-  ret void
-}
-
  ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
  ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
  ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll

index a6a402b93bf553675e4297e1bb92df2e0250fbf1..6ff66b94fdfed757da2eeccc3b437e498440cd40 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
@@ -8,14 +8,6 @@ declare float @llvm.fabs.f32(float) #0
  declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
  declare half @llvm.fabs.f16(half) #0
  
-; GCN-LABEL: {{^}}v_fcmp_f32_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %src0, float %src1, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src0, float %src1, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
  ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
  ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
  define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

index 06ac7da1288ae8ff496d79fc0033ab08bf9e2d93..8d5cdf1c27049fcbb6466c7b9d52db13f1ac7b02 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -6,15 +6,6 @@ declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
  declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
  declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
  
-; No crash on invalid input
-; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_icmp_i32_dynamic_cc(i64 addrspace(1)* %out, i32 %src, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
  ; GCN-LABEL: {{^}}v_icmp_i32_eq:
  ; GCN: v_cmp_eq_u32_e64
  define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
@@ -181,15 +172,6 @@ define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
    ret void
  }
  
-; GCN-LABEL: {{^}}v_icmp_i16_dynamic_cc:
-; GCN: s_endpgm
-define amdgpu_kernel void @v_icmp_i16_dynamic_cc(i64 addrspace(1)* %out, i16 %src, i32 %cc) {
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 %cc)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-; GCN-LABEL: {{^}}v_icmp_i16_eq:
  ; VI: v_cmp_eq_u16_e64
  
  ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll

index 51f047d9b64a111bb1228052d86abf22e1d83971..00da71e422f204b20630e54a05b05f031fdedabc 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
@@ -67,9 +67,6 @@ declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i
  declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
  declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
  declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i1, i1) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readonly }
-
-
diff --git a/test/DebugInfo/MIR/X86/kill-after-spill.mir b/test/DebugInfo/MIR/X86/kill-after-spill.mir

index 5110dc349bea72a1fc9c2508b04ffbc0d1a50f35..88ed269c93d4e3c279dbba1c161b311b83290ba9 100644 (file)
--- a/test/DebugInfo/MIR/X86/kill-after-spill.mir
+++ b/test/DebugInfo/MIR/X86/kill-after-spill.mir
@@ -123,9 +123,6 @@
  
    declare i8* @memset(i8*, i32, i64) local_unnamed_addr
  
-  ; Function Attrs: nounwind readnone speculatable
-  declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
-
    ; Function Attrs: nounwind readnone speculatable
    declare void @llvm.dbg.value(metadata, metadata, metadata) #1
  
diff --git a/test/LTO/X86/remangle_intrinsics.ll b/test/LTO/X86/remangle_intrinsics.ll

index 72ce8d3d8122d527f818ba5bb3689a7ba291748e..112cace0403b99094b3fc6115c1463ff9d2cf888 100644 (file)
--- a/test/LTO/X86/remangle_intrinsics.ll
+++ b/test/LTO/X86/remangle_intrinsics.ll
@@ -19,6 +19,3 @@ define void @foo(%struct.rtx_def* %a, i8 %b, i32 %c) {
  }
  
  declare void @llvm.memset.p0struct.rtx_def.i32(%struct.rtx_def*, i8, i32, i1)
-
-; Check that remangling code doesn't fail on an intrinsic with wrong signature
-declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll

index 166e90ab7b698ded8ae45f66b28fa34e81588281..4694a1c5f8d535c1f6260c8c07bd991150c59357 100644 (file)
--- a/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
+++ b/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
@@ -125,15 +125,6 @@ define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i
    ret i64 %ret
  }
  
-; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
-define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
-  %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
-  %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
-  ret i64 %ret
-}
-
  declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1
  declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) #1
  declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
diff --git a/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

index 6030d14c8e1ec5df675ce07cc284542e2a7d4d54..e0b8fab411c057f97ed75cefcef718f5289cc0cf 100644 (file)
--- a/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ b/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -320,229 +320,229 @@ declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i
  ; --------------------------------------------------------------------
  
  ; CHECK-LABEL: @raw_buffer_load_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret float %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <1 x float> %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <2 x float> %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <4 x float> %data
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <2 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 3
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <3 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
  ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
    %tmp2 = extractelement <4 x i32> %tmp1, i32 0
    ret i32 %tmp2
  }
  
  ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
  ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
    %tmp2 = extractelement <4 x float> %tmp1, i32 0
    ret float %tmp2
  }
  
  ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
@@ -560,229 +560,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #
  ; --------------------------------------------------------------------
  
  ; CHECK-LABEL: @raw_buffer_load_format_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret float %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <1 x float> %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <2 x float> %data
  }
  
  ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    ret <4 x float> %data
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <2 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 3
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <3 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
  ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
    %tmp2 = extractelement <4 x i32> %tmp1, i32 0
    ret i32 %tmp2
  }
  
  ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
  ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
    %tmp2 = extractelement <4 x float> %tmp1, i32 0
    ret float %tmp2
  }
  
  ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
@@ -800,229 +800,229 @@ declare <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32>, i32, i32,
  ; --------------------------------------------------------------------
  
  ; CHECK-LABEL: @struct_buffer_load_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret float %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <1 x float> %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <2 x float> %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <4 x float> %data
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <2 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 3
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <3 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
  ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
    %tmp2 = extractelement <4 x i32> %tmp1, i32 0
    ret i32 %tmp2
  }
  
  ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
  ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
    %tmp2 = extractelement <4 x float> %tmp1, i32 0
    ret float %tmp2
  }
  
  ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
@@ -1040,229 +1040,229 @@ declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32
  ; --------------------------------------------------------------------
  
  ; CHECK-LABEL: @struct_buffer_load_format_f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret float %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
-; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <1 x float> %data
-define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <1 x float> %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <2 x float> %data
  }
  
  ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <4 x float> %data
-define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    ret <4 x float> %data
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
-; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <2 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <4 x float> %data, i32 3
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
    ret <3 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt0 = extractelement <3 x float> %data, i32 0
    ret float %elt0
  }
  
  ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 1
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
  ; CHECK-NEXT: ret float %elt1
-define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %elt1 = extractelement <3 x float> %data, i32 2
    ret float %elt1
  }
  
  ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: ret <2 x float>
-define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
  ; CHECK-NEXT: ret <2 x float> %shuf
-define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
    ret <2 x float> %shuf
  }
  
  ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
-; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
  ; CHECK-NEXT: ret i32 %tmp2
-define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
    %tmp2 = extractelement <4 x i32> %tmp1, i32 0
    ret i32 %tmp2
  }
  
  ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32(
-; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
  ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
  ; CHECK-NEXT: ret float %tmp2
-define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
+define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
    %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
    %tmp2 = extractelement <4 x float> %tmp1, i32 0
    ret float %tmp2
  }
  
  ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
-  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
    %elt0 = extractelement <2 x float> %data, i32 0
    ret float %elt0
  }
@@ -1319,16 +1319,6 @@ define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
-; CHECK-NEXT: ret float %elt0
-define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
  ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
  ; CHECK-NEXT: ret float undef
  define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
@@ -2395,14 +2385,7 @@ define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8
    ret float %elt0
  }
  
-; Verify that we don't creash on non-constant operand.
-define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr {
-  %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1
-  ret <4 x half> %tmp
-}
-
  declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  
  ; --------------------------------------------------------------------
  ; TFE / LWE
diff --git a/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

index b8e19e2bc94932485e099924b51d98cbc96088d8..a065d10946df2d6bff6f01971a16d3152d718738 100644 (file)
--- a/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1078,17 +1078,7 @@ define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
  ; llvm.amdgcn.exp
  ; --------------------------------------------------------------------
  
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
-
-; Make sure no crashing on invalid variable params
-; CHECK-LABEL: @exp_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  ret void
-}
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
  
  ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
  ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
@@ -1136,16 +1126,7 @@ define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w
  ; llvm.amdgcn.exp.compr
  ; --------------------------------------------------------------------
  
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
-
-; CHECK-LABEL: @exp_compr_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  ret void
-}
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
  
  ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
  ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
@@ -1404,17 +1385,9 @@ define float @fmed3_0_1_undef_f32() {
  ; llvm.amdgcn.icmp
  ; --------------------------------------------------------------------
  
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_icmp_code(
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-  ret i64 %result
-}
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent
  
  ; CHECK-LABEL: @invalid_icmp_code(
  ; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
@@ -2012,15 +1985,7 @@ define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
  ; llvm.amdgcn.fcmp
  ; --------------------------------------------------------------------
  
-declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
-; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-  ret i64 %result
-}
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent
  
  ; CHECK-LABEL: @invalid_fcmp_code(
  ; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
diff --git a/test/Transforms/LowerExpectIntrinsic/PR33346.ll b/test/Transforms/LowerExpectIntrinsic/PR33346.ll

index ca962fbdc8f3cca1a18ab2f246481947c81eef22..5bcf93408b1aae15e146a3a64f166a202ea23436 100644 (file)
--- a/test/Transforms/LowerExpectIntrinsic/PR33346.ll
+++ b/test/Transforms/LowerExpectIntrinsic/PR33346.ll
@@ -7,12 +7,12 @@ bb:
    store i64 %arg, i64* %tmp, align 8
    %tmp1 = load i64, i64* %tmp, align 8
    %tmp2 = load i64, i64* %tmp, align 8
-  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
+  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 123)
    ret i64 %tmp3
  }
  
  ; Function Attrs: nounwind readnone
-declare i64 @llvm.expect.i64(i64, i64)
+declare i64 @llvm.expect.i64(i64, i64 immarg)
  
  
  !llvm.module.flags = !{!0}
diff --git a/test/Verifier/AMDGPU/intrinsic-immarg.ll b/test/Verifier/AMDGPU/intrinsic-immarg.ll

new file mode 100644 (file)

index 0000000..5b0e5b3
--- /dev/null
+++ b/test/Verifier/AMDGPU/intrinsic-immarg.ll
@@ -0,0 +1,552 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1)
+define void @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
+  %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
+  %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
+  ret void
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32)
+define void @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32)
+define void @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32)
+define void @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32)
+define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %dmask
+  ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
+  %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT:   %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
+  %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg
+  ; CHECK-NEXT:   %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
+  %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
+  ret void
+}
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
+define void @exp_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %en
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %tgt
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 %bool, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 %bool)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 %bool)
+  ret void
+}
+
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1)
+
+define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %en
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %tgt
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 %bool, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 %bool, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 false, i1 %bool)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 false, i1 %bool)
+  ret void
+}
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32)
+
+define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %c
+  ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+  ret i64 %result
+}
+
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32)
+define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %c
+  ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+  ret i64 %result
+}
+
+declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1)
+define amdgpu_kernel void @invalid_atomic_inc(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1)
+define amdgpu_kernel void @invalid_atomic_dec(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+  %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false)
+
+   ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+  %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool)
+  ret void
+}
+
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
+define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i1 undef
+  ; CHECK: %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.000000e+00, float undef, i1 undef)
+  %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef)
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+declare void @llvm.amdgcn.init.exec(i64)
+define amdgpu_ps void @init_exec(i64 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %var
+  ; CHECK-NEXT: call void @llvm.amdgcn.init.exec(i64 %var)
+  call void @llvm.amdgcn.init.exec(i64 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sendmsg(i32, i32)
+define void @sendmsg(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sendmsghalt(i32, i32)
+define void @sendmsghalt(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.waitcnt(i32)
+define void @waitcnt(i32 %arg0) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.getreg(i32)
+define void @getreg(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.getreg(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.sleep(i32)
+define void @sleep(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.sleep(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.incperflevel(i32)
+define void @incperflevel(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.incperflevel(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.s.decperflevel(i32)
+define void @decperflevel(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0)
+  %val = call i32 @llvm.amdgcn.s.decperflevel(i32 %arg0)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
+define void @ds_swizzle(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg1
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1)
+  %val = call i32 @llvm.amdgcn.ds.swizzle(i32 %arg0, i32 %arg1)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
+define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+  %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+  %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+  %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+  %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
+define amdgpu_kernel void @ds_ordered_swap(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+  %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+  %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+  %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+  %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1)
+define amdgpu_kernel void @mov_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 true)
+  %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 true)
+  %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 true)
+  %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool)
+  %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
+define amdgpu_kernel void @update_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 true)
+  %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 true)
+  %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 true)
+  %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 1)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool)
+  %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+  %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  ret void
+}
+
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val)
+  %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val)
+  ret void
+}
+
+declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+define amdgpu_ps void @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s, i32 %var, i1 %bool) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
+  %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %bool
+  ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0)
+  %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0)
+  %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var)
+  %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32)
+define amdgpu_ps void @load_1d_a16(<8 x i32> inreg %rsrc, <2 x i16> %coords, i16 %s, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+  %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32)
+define amdgpu_ps void @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 %data, i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var)
+  %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32)
+define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s, i32 %val) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+  %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %val
+  ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val)
+  ret void
+}
+
+declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1)
+define float @test_fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3)
+  %val = call float @llvm.amdgcn.fdot2(<2 x half> %arg0, <2 x half> %arg1, float %arg2, i1 %arg3)
+  ret float %val
+}
+
+declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1)
+define i32 @test_sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1)
+define i32 @test_udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.udot2(<2 x i16> %arg0, <2 x i16> %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1)
+define i32 @test_sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.sdot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
+
+declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1)
+define i32 @test_udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg3
+  ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
+  ret i32 %val
+}
diff --git a/test/Verifier/AMDGPU/lit.local.cfg b/test/Verifier/AMDGPU/lit.local.cfg

new file mode 100644 (file)

index 0000000..2a665f0
--- /dev/null
+++ b/test/Verifier/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll

index 66c5396443acd6daeb8592f84f0032bab43ad4ce..8c982e0924bed6e8025bf246074dcc09c2ae76a4 100644 (file)
--- a/test/Verifier/cttz-undef-arg.ll
+++ b/test/Verifier/cttz-undef-arg.ll
@@ -5,11 +5,13 @@ declare i32 @llvm.cttz.i32(i32, i1)
  
  define void @f(i32 %x, i1 %is_not_zero) {
  entry:
-; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i1 %is_not_zero
  ; CHECK-NEXT: @llvm.ctlz.i32
    call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero)
  
-; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i1 %is_not_zero
  ; CHECK-NEXT: @llvm.cttz.i32
    call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero)
    ret void
diff --git a/test/Verifier/element-wise-atomic-memory-intrinsics.ll b/test/Verifier/element-wise-atomic-memory-intrinsics.ll

index 81c8ba16b97d1f7209181bc19247d61804b5744b..f22f8eecbcc0456cb39a3a5f48ca58e194de8c95 100644 (file)
--- a/test/Verifier/element-wise-atomic-memory-intrinsics.ll
+++ b/test/Verifier/element-wise-atomic-memory-intrinsics.ll
@@ -1,8 +1,11 @@
  ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
  
  define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %E
+  ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
+
    ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3)
  
@@ -21,11 +24,15 @@ define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) {
  
    ret void
  }
+
  declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
  
  define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %E
+  ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
+
    ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3)
  
@@ -44,11 +51,15 @@ define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
  
    ret void
  }
+
  declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
  
  define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) {
-  ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %E
+  ; CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
+
    ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3)
  
diff --git a/test/Verifier/frameescape.ll b/test/Verifier/frameescape.ll

index c03134159db11c31c0a9c7120c910a4b23767b17..0850b9495d9bbb9552f65a34b7a39385403140e6 100644 (file)
--- a/test/Verifier/frameescape.ll
+++ b/test/Verifier/frameescape.ll
@@ -47,7 +47,10 @@ define internal void @k(i32 %n) {
    call i8* @llvm.localrecover(i8* bitcast(void()* @f to i8*), i8* null, i32 %n)
    ret void
  }
-; CHECK: idx argument of llvm.localrecover must be a constant int
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 %n
+; CHECK-NEXT: %1 = call i8* @llvm.localrecover(i8* bitcast (void ()* @f to i8*), i8* null, i32 %n)
  
  define internal void @l(i8* %b) {
    %a = alloca i8
diff --git a/test/Verifier/immarg-param-attribute-invalid.ll b/test/Verifier/immarg-param-attribute-invalid.ll

new file mode 100644 (file)

index 0000000..5b923c5
--- /dev/null
+++ b/test/Verifier/immarg-param-attribute-invalid.ll
@@ -0,0 +1,107 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
+declare void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> immarg)
+declare void @llvm.test.immarg.intrinsic.f32(float immarg)
+declare void @llvm.test.immarg.intrinsic.v2f32(<2 x float> immarg)
+declare void @llvm.test.immarg.intrinsic.2ai32([2 x i32] immarg)
+
+@gv = global i32 undef, align 4
+
+define void @call_llvm.test.immarg.intrinsic.i32(i32 %arg) {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 undef)
+  call void @llvm.test.immarg.intrinsic.i32(i32 undef)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: i32 %arg
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 %arg)
+  call void @llvm.test.immarg.intrinsic.i32(i32 %arg)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 ptrtoint (i32* @gv to i32)
+  ; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32))
+  call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32))
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.f32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: float undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.f32(float undef)
+  call void @llvm.test.immarg.intrinsic.f32(float undef)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.v2i32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> <i32 1, i32 2>)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x i32> undef
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef)
+  call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.v2f32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x float> zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: <2 x float> <float 1.000000e+00, float 2.000000e+00>
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>)
+  call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> <float 1.0, float 2.0>)
+  ret void
+}
+
+define void @call_llvm.test.immarg.intrinsic.2ai32() {
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: [2 x i32] zeroinitializer
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer)
+  call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer)
+
+; CHECK: immarg operand has non-immediate parameter
+; CHECK-NEXT: [2 x i32] [i32 1, i32 2]
+; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2])
+  call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2])
+  ret void
+}
+
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK-NEXT: void (i32)* @not_an_intrinsic
+declare void @not_an_intrinsic(i32 immarg)
+
+declare void @llvm.test.intrinsic(i32)
+declare void @func(i32)
+
+define void @only_on_callsite() {
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK-NEXT: call void @func(i32 immarg 0)
+; CHECK-NEXT: immarg may not apply only to call sites
+; CHECK-NEXT: i32 0
+; CHECK-NEXT: call void @func(i32 immarg 0)
+  call void @func(i32 immarg 0)
+
+; CHECK: immarg may not apply only to call sites
+; CHECK-NEXT: i32 0
+; CHECK-NEXT: call void @llvm.test.intrinsic(i32 immarg 0)
+  call void @llvm.test.intrinsic(i32 immarg 0)
+  ret void
+}
+
+; CHECK: immarg attribute only applies to intrinsics
+; CHECK: void (i32)* @on_function_definition
+define void @on_function_definition(i32 immarg %arg) {
+  ret void
+}
diff --git a/test/Verifier/intrinsic-immarg.ll b/test/Verifier/intrinsic-immarg.ll

new file mode 100644 (file)

index 0000000..4a701dd
--- /dev/null
+++ b/test/Verifier/intrinsic-immarg.ll
@@ -0,0 +1,223 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @llvm.returnaddress(i32)
+define void @return_address(i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result = call i8* @llvm.returnaddress(i32 %var)
+  %result = call i8* @llvm.returnaddress(i32 %var)
+  ret void
+}
+
+declare i8* @llvm.frameaddress(i32)
+define void @frame_address(i32 %var) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %var
+  ; CHECK-NEXT: %result = call i8* @llvm.frameaddress(i32 %var)
+  %result = call i8* @llvm.frameaddress(i32 %var)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+define void @memset(i8* %dest, i8 %val, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile)
+  ret void
+}
+
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %a
+  ; CHECK-NEXT: %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false)
+  %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %b
+  ; CHECK-NEXT: %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false)
+  %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false)
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %c
+  ; CHECK-NEXT: %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c)
+  %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c)
+  ret void
+}
+
+declare i8 @llvm.expect.i8(i8, i8)
+define i8 @expect(i8 %arg0, i8 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i8 %arg1
+  ; CHECK-NEXT: %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1)
+  %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1)
+  ret i8 %ret
+}
+
+declare i64 @llvm.smul.fix.i64(i64, i64, i32)
+define i64 @smul_fix(i64 %arg0, i64 %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  ret i64 %ret
+}
+
+declare i64 @llvm.umul.fix.i64(i64, i64, i32)
+define i64 @umul_fix(i64 %arg0, i64 %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2)
+  ret i64 %ret
+}
+
+declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
+define <2 x double> @masked_load(<2 x i1> %mask, <2 x double>* %addr, <2 x double> %dst, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst)
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
+define void @masked_store(<4 x i1> %mask, <4 x i32>* %addr, <4 x i32> %val, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask)
+  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask)
+  ret void
+}
+
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+define <2 x double> @test_gather(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0, i32 %align)  {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %align
+  ; CHECK: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
+define void @test_scatter_8i32(<8 x i32> %a1, <8 x i32*> %ptr, <8 x i1> %mask, i32 %align) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %align
+  ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+define void @test_lifetime_start(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.lifetime.start.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
+define void @test_lifetime_end(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.lifetime.end.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.invariant.start.p0i8(i64, i8*)
+define void @test_invariant_start(i64 %arg0, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg0
+  ; CHECK-NEXT: call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr)
+  call void @llvm.invariant.start.p0i8(i64 %arg0, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.invariant.end.p0i8({}*, i64, i8*)
+define void @test_invariant_end({}* %scope, i64 %arg1, i8* %ptr) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg1
+  ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr)
+  call void @llvm.invariant.end.p0i8({}* %scope, i64 %arg1, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+define void @test_prefetch(i8* %ptr, i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT:  i32 %arg1
+  call void @llvm.prefetch(i8* %ptr, i32 %arg0, i32 0, i32 0)
+  call void @llvm.prefetch(i8* %ptr, i32 0, i32 %arg1, i32 0)
+  ret void
+}
+
+declare void @llvm.localrecover(i8*, i8*, i32)
+define void @test_localrecover(i8* %func, i8* %fp, i32 %idx) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %idx
+  ; CHECK-NEXT: call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+  call void @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+define private void @f() {
+  ret void
+}
+
+define void @calls_statepoint(i8 addrspace(1)* %arg0, i64 %arg1, i32 %arg2, i32 %arg4, i32 %arg5) gc "statepoint-example" {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i64 %arg1
+  ; CHECK-NEXT: %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg4
+  ; CHECK-NEXT: %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg5
+  ; CHECK-NEXT: %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %cast = bitcast i8 addrspace(1)* %arg0 to i64 addrspace(1)*
+  %safepoint0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 %arg1, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 %arg2, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 %arg4, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  %safepoint3 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 %arg5, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg0, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg0, i8 addrspace(1)* %arg0)
+  ret void
+}
+
+declare void @llvm.hwasan.check.memaccess(i8*, i8*, i32)
+
+define void @hwasan_check_memaccess(i8* %arg0,i8* %arg1, i32 %arg2) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK: i32 %arg2
+  ; CHECK: call void @llvm.hwasan.check.memaccess(i8* %arg0, i8* %arg1, i32 %arg2)
+  call void @llvm.hwasan.check.memaccess(i8* %arg0,i8* %arg1, i32 %arg2)
+  ret void
+}
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h

index 31bdc01959a62229ae34005f2084c9f9c2ce61a4..9dcd136463cc7cc2abd606d1de2a5bda995c8950 100644 (file)
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -136,7 +136,15 @@ struct CodeGenIntrinsic {
    // True if the intrinsic is marked as speculatable.
    bool isSpeculatable;
  
-  enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
+  enum ArgAttribute {
+    NoCapture,
+    Returned,
+    ReadOnly,
+    WriteOnly,
+    ReadNone,
+    ImmArg
+  };
+
    std::vector<std::pair<unsigned, ArgAttribute>> ArgumentAttributes;
  
    bool hasProperty(enum SDNP Prop) const {
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp

index 5a9bc60ad295bd1ff6bfb75c87961fbaa466ac59..eb0b6e678e846cfc9cadf8ed0db06de20a65e20d 100644 (file)
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -715,6 +715,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
      } else if (Property->isSubClassOf("ReadNone")) {
        unsigned ArgNo = Property->getValueAsInt("ArgNo");
        ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadNone));
+    } else if (Property->isSubClassOf("ImmArg")) {
+      unsigned ArgNo = Property->getValueAsInt("ArgNo");
+      ArgumentAttributes.push_back(std::make_pair(ArgNo, ImmArg));
      } else
        llvm_unreachable("Unknown property!");
    }
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp

index 19c0266cc3869ea110d2c51359d967362b73e2e1..6461ab1c83ad21eaeaf793264645ef5d694f83c7 100644 (file)
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -504,7 +504,6 @@ struct AttributeComparator {
      CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
      CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
      if (LK != RK) return (LK > RK);
-
      // Order by argument attributes.
      // This is reliable because each side is already sorted internally.
      return (L->ArgumentAttributes < R->ArgumentAttributes);
@@ -612,6 +611,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
              OS << "Attribute::ReadNone";
              addComma = true;
              break;
+          case CodeGenIntrinsic::ImmArg:
+            if (addComma)
+              OS << ',';
+            OS << "Attribute::ImmArg";
+            addComma = true;
+            break;
            }
  
            ++ai;
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el

index cde66d122866f359366a994da555f56db2eeb557..c4a1b1f02bfa7c08ac5880e78c6020282f2d4880 100644 (file)
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -27,7 +27,7 @@
           "noduplicate" "noimplicitfloat" "noinline" "nonlazybind" "noredzone" "noreturn"
           "norecurse" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice"
           "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress"
-         "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly") 'symbols) . font-lock-constant-face)
+         "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly" "immarg") 'symbols) . font-lock-constant-face)
     ;; Variables
     '("%[-a-zA-Z$._][-a-zA-Z$._0-9]*" . font-lock-variable-name-face)
     ;; Labels
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 12 Mar 2019 21:02:54 +0000 (21:02 +0000)
docs/LangRef.rst		patch \| blob \| history
docs/ReleaseNotes.rst		patch \| blob \| history
include/llvm/Bitcode/LLVMBitCodes.h		patch \| blob \| history
include/llvm/IR/Attributes.td		patch \| blob \| history
include/llvm/IR/Intrinsics.td		patch \| blob \| history
include/llvm/IR/IntrinsicsAMDGPU.td		patch \| blob \| history
lib/AsmParser/LLLexer.cpp		patch \| blob \| history
lib/AsmParser/LLParser.cpp		patch \| blob \| history
lib/AsmParser/LLToken.h		patch \| blob \| history
lib/Bitcode/Reader/BitcodeReader.cpp		patch \| blob \| history
lib/Bitcode/Writer/BitcodeWriter.cpp		patch \| blob \| history
lib/IR/Attributes.cpp		patch \| blob \| history
lib/IR/Verifier.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp		patch \| blob \| history
lib/Transforms/Utils/CodeExtractor.cpp		patch \| blob \| history
test/Assembler/auto_upgrade_intrinsics.ll		patch \| blob \| history
test/Assembler/autoupgrade-invalid-mem-intrinsics.ll	[new file with mode: 0644]	patch \| blob
test/Assembler/immarg-param-attribute.ll	[new file with mode: 0644]	patch \| blob
test/Assembler/invalid-immarg.ll	[new file with mode: 0644]	patch \| blob
test/Assembler/invalid-immarg2.ll	[new file with mode: 0644]	patch \| blob
test/Assembler/invalid-immarg3.ll	[new file with mode: 0644]	patch \| blob
test/Bitcode/compatibility.ll		patch \| blob \| history
test/Bitcode/objectsize-upgrade-7.0.ll		patch \| blob \| history
test/Bitcode/upgrade-memory-intrinsics.ll		patch \| blob \| history
test/CodeGen/AMDGPU/bitcast-vector-extract.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll		patch \| blob \| history
test/DebugInfo/MIR/X86/kill-after-spill.mir		patch \| blob \| history
test/LTO/X86/remangle_intrinsics.ll		patch \| blob \| history
test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll		patch \| blob \| history
test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll		patch \| blob \| history
test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll		patch \| blob \| history
test/Transforms/LowerExpectIntrinsic/PR33346.ll		patch \| blob \| history
test/Verifier/AMDGPU/intrinsic-immarg.ll	[new file with mode: 0644]	patch \| blob
test/Verifier/AMDGPU/lit.local.cfg	[new file with mode: 0644]	patch \| blob
test/Verifier/cttz-undef-arg.ll		patch \| blob \| history
test/Verifier/element-wise-atomic-memory-intrinsics.ll		patch \| blob \| history
test/Verifier/frameescape.ll		patch \| blob \| history
test/Verifier/immarg-param-attribute-invalid.ll	[new file with mode: 0644]	patch \| blob
test/Verifier/intrinsic-immarg.ll	[new file with mode: 0644]	patch \| blob
utils/TableGen/CodeGenIntrinsics.h		patch \| blob \| history
utils/TableGen/CodeGenTarget.cpp		patch \| blob \| history
utils/TableGen/IntrinsicEmitter.cpp		patch \| blob \| history
utils/emacs/llvm-mode.el		patch \| blob \| history