]> granicus.if.org Git - clang/commitdiff
[X86] Add custom emission for the avx512 scatter builtins to convert from scalar...
authorCraig Topper <craig.topper@intel.com>
Thu, 17 Jan 2019 00:34:19 +0000 (00:34 +0000)
committerCraig Topper <craig.topper@intel.com>
Thu, 17 Jan 2019 00:34:19 +0000 (00:34 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@351408 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CGBuiltin.cpp
test/CodeGen/avx512f-builtins.c
test/CodeGen/avx512vl-builtins.c

index 02807bac5f4ce778281f349b6ed5b84600f4ff63..34bf194be2fd8d270f0d1d87e9a26517a0c6c115 100644 (file)
@@ -10190,6 +10190,114 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(Intr, Ops);
   }
 
+  case X86::BI__builtin_ia32_scattersiv8df:
+  case X86::BI__builtin_ia32_scattersiv16sf:
+  case X86::BI__builtin_ia32_scatterdiv8df:
+  case X86::BI__builtin_ia32_scatterdiv16sf:
+  case X86::BI__builtin_ia32_scattersiv8di:
+  case X86::BI__builtin_ia32_scattersiv16si:
+  case X86::BI__builtin_ia32_scatterdiv8di:
+  case X86::BI__builtin_ia32_scatterdiv16si:
+  case X86::BI__builtin_ia32_scatterdiv2df:
+  case X86::BI__builtin_ia32_scatterdiv2di:
+  case X86::BI__builtin_ia32_scatterdiv4df:
+  case X86::BI__builtin_ia32_scatterdiv4di:
+  case X86::BI__builtin_ia32_scatterdiv4sf:
+  case X86::BI__builtin_ia32_scatterdiv4si:
+  case X86::BI__builtin_ia32_scatterdiv8sf:
+  case X86::BI__builtin_ia32_scatterdiv8si:
+  case X86::BI__builtin_ia32_scattersiv2df:
+  case X86::BI__builtin_ia32_scattersiv2di:
+  case X86::BI__builtin_ia32_scattersiv4df:
+  case X86::BI__builtin_ia32_scattersiv4di:
+  case X86::BI__builtin_ia32_scattersiv4sf:
+  case X86::BI__builtin_ia32_scattersiv4si:
+  case X86::BI__builtin_ia32_scattersiv8sf:
+  case X86::BI__builtin_ia32_scattersiv8si: {
+    Intrinsic::ID IID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unexpected builtin");
+    case X86::BI__builtin_ia32_scattersiv8df:
+      IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
+      break;
+    case X86::BI__builtin_ia32_scattersiv16sf:
+      IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv8df:
+      IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv16sf:
+      IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
+      break;
+    case X86::BI__builtin_ia32_scattersiv8di:
+      IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
+      break;
+    case X86::BI__builtin_ia32_scattersiv16si:
+      IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv8di:
+      IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv16si:
+      IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv2df:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv2di:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv4df:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv4di:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv4sf:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv4si:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv8sf:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
+      break;
+    case X86::BI__builtin_ia32_scatterdiv8si:
+      IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
+      break;
+    case X86::BI__builtin_ia32_scattersiv2df:
+      IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
+      break;
+    case X86::BI__builtin_ia32_scattersiv2di:
+      IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
+      break;
+    case X86::BI__builtin_ia32_scattersiv4df:
+      IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
+      break;
+    case X86::BI__builtin_ia32_scattersiv4di:
+      IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
+      break;
+    case X86::BI__builtin_ia32_scattersiv4sf:
+      IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
+      break;
+    case X86::BI__builtin_ia32_scattersiv4si:
+      IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
+      break;
+    case X86::BI__builtin_ia32_scattersiv8sf:
+      IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
+      break;
+    case X86::BI__builtin_ia32_scattersiv8si:
+      IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
+      break;
+    }
+
+    unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(),
+                                Ops[3]->getType()->getVectorNumElements());
+    Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
+    Function *Intr = CGM.getIntrinsic(IID);
+    return Builder.CreateCall(Intr, Ops);
+  }
+
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
index 7ded4e3a4da151620d19ef3f4a0ca197ead695b4..fd037c8dab59285cb152cd83acc61f77c18ef4be 100644 (file)
@@ -7098,97 +7098,97 @@ __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m25
 
 void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.qps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qps.512
   return _mm512_i64scatter_ps(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.qps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qps.512
   return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512
   return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512
   return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512
   return _mm512_i64scatter_pd(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512
   return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512
   return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512
   return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.dps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dps.512
   return _mm512_i32scatter_ps(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatter.dps.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dps.512
   return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512
   return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatter.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512
   return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
   return _mm512_i32scatter_pd(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatter.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
   return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); 
 }
 
 void test_mm512_i32scatter_epi64(void *__addr, __m256i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
   return _mm512_i32scatter_epi64(__addr, __index, __v1, 2); 
 }
 
 void test_mm512_mask_i32scatter_epi64(void *__addr, __mmask8 __mask, __m256i __index, __m512i __v1) {
   // CHECK-LABEL: @test_mm512_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatter.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
   return _mm512_mask_i32scatter_epi64(__addr, __mask, __index, __v1, 2); 
 }
 
index 1890ad47f983040c6ce66153b54d22a56d913145..117b9974f87d95603970ce0142403bc8fb58b652 100644 (file)
@@ -5058,162 +5058,162 @@ __m256 test_mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 }
 void test_mm_i64scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df
   return _mm_i64scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.df
   return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di
   return _mm_i64scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv2.di
   return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_pd(double *__addr, __m256i __index,  __m256d __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df
   return _mm256_i64scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.df
   return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di
   return _mm256_i64scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_epi64
-  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.di
   return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf
   return _mm_i64scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.sf
   return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i64scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si
   return _mm_i64scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv4.si
   return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_ps(float *__addr, __m256i __index,  __m128 __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf
   return _mm256_i64scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.sf
   return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i64scatter_epi32(int *__addr, __m256i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm256_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si
   return _mm256_i64scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i64scatter_epi32
-  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scatterdiv8.si
   return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.df
   return _mm_i32scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.df
   return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.di
   return _mm_i32scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv2.di
   return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_pd(double *__addr, __m128i __index,  __m256d __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.df
   return _mm256_i32scatter_pd(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.df
   return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.di
   return _mm256_i32scatter_epi64(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask,  __m128i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_epi64
-  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.di
   return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf
   return _mm_i32scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.sf
   return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm_i32scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
   // CHECK-LABEL: @test_mm_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.si
   return _mm_i32scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
   // CHECK-LABEL: @test_mm_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv4.si
   return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_ps(float *__addr, __m256i __index,  __m256 __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf
   return _mm256_i32scatter_ps(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.sf
   return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
 }
 void test_mm256_i32scatter_epi32(int *__addr, __m256i __index,  __m256i __v1) {
   // CHECK-LABEL: @test_mm256_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.si
   return _mm256_i32scatter_epi32(__addr,__index,__v1,2); 
 }
 void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
   // CHECK-LABEL: @test_mm256_mask_i32scatter_epi32
-  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  // CHECK: @llvm.x86.avx512.mask.scattersiv8.si
   return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
 }
 __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {