unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-
+ Value *Cmp = nullptr;
for (unsigned i = 0; i < NumLoads; ++i) {
unsigned LoadSize = getLoadSize(RemainingBytes);
unsigned GEPIndex = NumBytesProcessed / LoadSize;
LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
}
- Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
- Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
- XorList.push_back(Diff);
+ if (NumLoads != 1) {
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or.
+ Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+ Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+ XorList.push_back(Diff);
+ } else {
+ // If there's only one load per block, we just compare the loaded values.
+ Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ }
}
auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
return OutList;
};
- // Pairwise OR the XOR results.
- OrList = pairWiseOr(XorList);
+ if (!Cmp) {
+ // Pairwise OR the XOR results.
+ OrList = pairWiseOr(XorList);
- // Pairwise OR the OR results until one result left.
- while (OrList.size() != 1) {
- OrList = pairWiseOr(OrList);
+ // Pairwise OR the OR results until one result left.
+ while (OrList.size() != 1) {
+ OrList = pairWiseOr(OrList);
+ }
+ Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
}
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0],
- ConstantInt::get(Diff->getType(), 0));
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
: LoadCmpBlocks[Index + 1];
; CHECK-NEXT: lwz 3, 0(3)
; CHECK-NEXT: lwz 4, 0(4)
; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: xor 3, 3, 4
-; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: isel 3, 0, 5, 2
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
; CHECK: # BB#0: # %loadbb
; CHECK-NEXT: ld 5, 0(3)
; CHECK-NEXT: ld 6, 0(4)
-; CHECK-NEXT: xor. 5, 5, 6
+; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB1_2
; CHECK-NEXT: # BB#1: # %loadbb1
; CHECK-NEXT: ld 3, 8(3)
; CHECK-NEXT: ld 4, 8(4)
-; CHECK-NEXT: xor. 3, 3, 4
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 0
; CHECK-NEXT: beq 0, .LBB1_3
; CHECK-NEXT: .LBB1_2: # %res_block
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB1_3:
-; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: .LBB1_3: # %endblock
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
; CHECK: # BB#0: # %loadbb
; CHECK-NEXT: lwz 5, 0(3)
; CHECK-NEXT: lwz 6, 0(4)
-; CHECK-NEXT: xor 5, 5, 6
-; CHECK-NEXT: cmplwi 5, 0
+; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB2_3
; CHECK-NEXT: # BB#1: # %loadbb1
; CHECK-NEXT: lhz 5, 4(3)
; CHECK-NEXT: lhz 6, 4(4)
-; CHECK-NEXT: xor 5, 5, 6
-; CHECK-NEXT: rlwinm. 5, 5, 0, 16, 31
+; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB2_3
; CHECK-NEXT: # BB#2: # %loadbb2
; CHECK-NEXT: lbz 3, 6(3)
; CHECK-NEXT: lbz 4, 6(4)
-; CHECK-NEXT: xor 3, 3, 4
-; CHECK-NEXT: rlwinm. 3, 3, 0, 24, 31
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 0
; CHECK-NEXT: beq 0, .LBB2_4
; CHECK-NEXT: .LBB2_3: # %res_block
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: .LBB2_4: # %endblock
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha
; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l(3)
; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l(4)
-; CHECK-NEXT: xor. 3, 3, 4
+; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB5_2
; CHECK-NEXT: # BB#1: # %loadbb1
; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8
; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3)
; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4)
-; CHECK-NEXT: xor. 3, 3, 4
-; CHECK-NEXT: beq 0, .LBB5_4
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: beq 0, .LBB5_3
; CHECK-NEXT: .LBB5_2: # %res_block
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: .LBB5_3: # %endblock
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB5_4:
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB5_3
%call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
%not.tobool = icmp eq i32 %call, 0
%cond = zext i1 %not.tobool to i32