// The result vector
Value *VResult = PassThru;
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+ ++MemIndex;
+ }
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
unsigned VectorWidth = VecType->getNumElements();
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ Builder.CreateAlignedStore(OneElt, NewPtr, 1);
+ ++MemIndex;
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
define void @compressstore_v16f32_const(float* %base, <16 x float> %V) {
; SSE2-LABEL: compressstore_v16f32_const:
-; SSE2: ## %bb.0: ## %cond.store
+; SSE2: ## %bb.0:
; SSE2-NEXT: movss %xmm0, (%rdi)
; SSE2-NEXT: movaps %xmm0, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: retq
;
; SSE42-LABEL: compressstore_v16f32_const:
-; SSE42: ## %bb.0: ## %cond.store
+; SSE42: ## %bb.0:
; SSE42-NEXT: movups %xmm0, (%rdi)
; SSE42-NEXT: movups %xmm1, 16(%rdi)
; SSE42-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; SSE42-NEXT: retq
;
; AVX1-LABEL: compressstore_v16f32_const:
-; AVX1: ## %bb.0: ## %cond.store
+; AVX1: ## %bb.0:
; AVX1-NEXT: vmovups %ymm0, (%rdi)
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: compressstore_v16f32_const:
-; AVX2: ## %bb.0: ## %cond.store
+; AVX2: ## %bb.0:
; AVX2-NEXT: vmovups %ymm0, (%rdi)
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,4]
; AVX2-NEXT: vpermps %ymm1, %ymm0, %ymm0
define <4 x float> @expandload_v4f32_const(float* %base, <4 x float> %src0) {
; SSE2-LABEL: expandload_v4f32_const:
-; SSE2: ## %bb.0: ## %cond.load
+; SSE2: ## %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[0,0]
; SSE2-NEXT: retq
;
; SSE42-LABEL: expandload_v4f32_const:
-; SSE42: ## %bb.0: ## %cond.load
+; SSE42: ## %bb.0:
; SSE42-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE42-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: expandload_v4f32_const:
-; AVX1OR2: ## %bb.0: ## %cond.load
+; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) {
; SSE2-LABEL: expandload_v16f32_const:
-; SSE2: ## %bb.0: ## %cond.load
+; SSE2: ## %bb.0:
; SSE2-NEXT: movups (%rdi), %xmm0
; SSE2-NEXT: movups 16(%rdi), %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; SSE2-NEXT: retq
;
; SSE42-LABEL: expandload_v16f32_const:
-; SSE42: ## %bb.0: ## %cond.load
+; SSE42: ## %bb.0:
; SSE42-NEXT: movups (%rdi), %xmm0
; SSE42-NEXT: movups 16(%rdi), %xmm1
; SSE42-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: expandload_v16f32_const:
-; AVX1OR2: ## %bb.0: ## %cond.load
+; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
define <16 x float> @expandload_v16f32_const_undef(float* %base) {
; SSE2-LABEL: expandload_v16f32_const_undef:
-; SSE2: ## %bb.0: ## %cond.load
+; SSE2: ## %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE2-NEXT: retq
;
; SSE42-LABEL: expandload_v16f32_const_undef:
-; SSE42: ## %bb.0: ## %cond.load
+; SSE42: ## %bb.0:
; SSE42-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE42-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; SSE42-NEXT: movups (%rdi), %xmm0
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: expandload_v16f32_const_undef:
-; AVX1OR2: ## %bb.0: ## %cond.load
+; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX1OR2-NEXT: vinsertf128 $1, 44(%rdi), %ymm0, %ymm1
define <2 x i64> @expandload_v2i64_const(i64* %base, <2 x i64> %src0) {
; SSE2-LABEL: expandload_v2i64_const:
-; SSE2: ## %bb.0: ## %else
+; SSE2: ## %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE42-LABEL: expandload_v2i64_const:
-; SSE42: ## %bb.0: ## %else
+; SSE42: ## %bb.0:
; SSE42-NEXT: pinsrq $1, (%rdi), %xmm0
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: expandload_v2i64_const:
-; AVX1OR2: ## %bb.0: ## %else
+; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0
; AVX1OR2-NEXT: retq
;
define <2 x i64> @test5(i64* %base, <2 x i64> %src0) {
; CHECK-LABEL: test5:
-; CHECK: # %bb.0: # %else
+; CHECK: # %bb.0:
; CHECK-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0)
define <8 x i8> @foo(<16 x i8> %a) {
; CHECK-LABEL: foo:
-; CHECK: # %bb.0: # %cond.store
+; CHECK: # %bb.0:
; CHECK-NEXT: pextrb $0, %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: pextrb $2, %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: pextrb $4, %xmm0, -{{[0-9]+}}(%rsp)
define void @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %data) {
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT: br i1 true, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.store:
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1
+; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT: store i64 [[ELT0]], i64* [[TMP1]], align 1
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT: br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.store1:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
+; CHECK-NEXT: store i64 [[ELT1]], i64* [[TMP2]], align 1
; CHECK-NEXT: ret void
;
call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 true, i1 true>)
define void @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %data) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT: br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.store:
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT: br i1 false, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.store1:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
; CHECK-NEXT: ret void
;
call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 false>)
define void @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %data) {
; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT: br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.store:
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT: br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.store1:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
+; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT: store i64 [[ELT1]], i64* [[TMP1]], align 1
; CHECK-NEXT: ret void
;
call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 true>)
define <2 x i64> @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT: br i1 true, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.load:
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT: br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.load1:
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[TMP1]], align 1
+; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[TMP2]], align 1
+; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
define <2 x i64> @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT: br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.load:
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT: br i1 false, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.load1:
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
define <2 x i64> @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT: br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK: cond.load:
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT: br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK: cond.load1:
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT: br label [[ELSE2]]
-; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[TMP1]], align 1
+; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret