From f1bc4aa5fa5b44ded9991dfa0df5d1a9d13ff5b7 Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Fri, 27 Oct 2017 12:34:18 +0000
Subject: [PATCH] [CodeGen][ExpandMemCmp][NFC] Simplify load sequence
 generation.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316763 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CodeGenPrepare.cpp |  73 ++++++++----------
 test/CodeGen/X86/memcmp.ll     | 135 ++++++++++++++++++++-------------
 2 files changed, 115 insertions(+), 93 deletions(-)
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 1e5f15397bb..346248ffc06 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1743,7 +1743,6 @@ class MemCmpExpansion {
     const uint64_t Offset;
   };
   SmallVector<LoadEntry, 8> LoadSequence;
-  void computeLoadSequence();
 
   void createLoadCmpBlocks();
   void createResultBlock();
@@ -1759,18 +1758,13 @@ class MemCmpExpansion {
   Value *getMemCmpEqZeroOneBlock();
   Value *getMemCmpOneBlock();
 
-  // Computes the decomposition. THis is the common code to compute the number
-  // of loads and the actual load sequence. `callback` is called with each load
-  // size and number of loads for the block size.
-  template <typename CallBackT>
-  void getDecomposition(CallBackT callback) const;
-
  public:
   MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
-                  unsigned NumLoadsPerBlock, const DataLayout &DL);
+                  unsigned MaxNumLoads, unsigned NumLoadsPerBlock,
+                  const DataLayout &DL);
 
   unsigned getNumBlocks();
-  uint64_t getNumLoads() const { return NumLoads; }
+  uint64_t getNumLoads() const { return LoadSequence.size(); }
 
   Value *getMemCmpExpansion();
 };
@@ -1787,6 +1781,7 @@ class MemCmpExpansion {
 // LoadCmpBlock finds a difference.
 MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
                                  const unsigned MaxLoadSize,
+                                 const unsigned MaxNumLoads,
                                  const unsigned LoadsPerBlock,
                                  const DataLayout &TheDataLayout)
     : CI(CI),
@@ -1798,27 +1793,34 @@ MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
       IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)),
       DL(TheDataLayout),
       Builder(CI) {
+  assert(Size > 0 && "zero blocks");
   // Scale the max size down if the target can load more bytes than we need.
   while (this->MaxLoadSize > Size) {
     this->MaxLoadSize /= 2;
   }
-  // Compute the number of loads. At that point we don't want to compute the
-  // actual decomposition because it might be too large to fit in memory.
-  getDecomposition([this](unsigned LoadSize, uint64_t NumLoadsForSize) {
-    NumLoads += NumLoadsForSize;
-  });
-}
-
-template <typename CallBackT>
-void MemCmpExpansion::getDecomposition(CallBackT callback) const {
+  // Compute the decomposition.
   unsigned LoadSize = this->MaxLoadSize;
-  assert(Size > 0 && "zero blocks");
   uint64_t CurSize = Size;
+  uint64_t Offset = 0;
   while (CurSize) {
     assert(LoadSize > 0 && "zero load size");
     const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
+    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+      // Do not expand if the total number of loads is larger than what the
+      // target allows. Note that it's important that we exit before completing
+      // the expansion to avoid using a ton of memory to store the expansion for
+      // large sizes.
+      LoadSequence.clear();
+      return;
+    }
     if (NumLoadsForThisSize > 0) {
-      callback(LoadSize, NumLoadsForThisSize);
+      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+        LoadSequence.push_back({LoadSize, Offset});
+        Offset += LoadSize;
+      }
+      if (LoadSize > 1) {
+        ++NumLoadsNonOneByte;
+      }
       CurSize = CurSize % LoadSize;
     }
     // FIXME: This can result in a non-native load size (e.g. X86-32+SSE can
@@ -1827,21 +1829,7 @@ void MemCmpExpansion::getDecomposition(CallBackT callback) const {
     // 4).
     LoadSize /= 2;
   }
-}
-
-void MemCmpExpansion::computeLoadSequence() {
-  uint64_t Offset = 0;
-  getDecomposition(
-      [this, &Offset](unsigned LoadSize, uint64_t NumLoadsForSize) {
-        for (uint64_t I = 0; I < NumLoadsForSize; ++I) {
-          LoadSequence.push_back({LoadSize, Offset});
-          Offset += LoadSize;
-        }
-        if (LoadSize > 1) {
-          ++NumLoadsNonOneByte;
-        }
-      });
-  assert(LoadSequence.size() == getNumLoads() && "mismatch in numbe rof loads");
+  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
 }
 
 unsigned MemCmpExpansion::getNumBlocks() {
@@ -2241,7 +2229,6 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
 // This function expands the memcmp call into an inline expansion and returns
 // the memcmp result.
 Value *MemCmpExpansion::getMemCmpExpansion() {
-  computeLoadSequence();
   // A memcmp with zero-comparison with only one block of load and compare does
   // not need to set up any extra blocks. This case could be handled in the DAG,
   // but since we have all of the machinery to flexibly expand any memcpy here,
@@ -2372,17 +2359,23 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   }
   const uint64_t SizeVal = SizeCast->getZExtValue();
 
+  if (SizeVal == 0) {
+    return false;
+  }
+
   // TTI call to check if target would like to expand memcmp. Also, get the
   // max LoadSize.
   unsigned MaxLoadSize;
   if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false;
 
-  MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MemCmpNumLoadsPerBlock,
-                            *DL);
+  const unsigned MaxNumLoads =
+      TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+
+  MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MaxNumLoads,
+                            MemCmpNumLoadsPerBlock, *DL);
 
   // Don't expand if this will require more loads than desired by the target.
-  if (Expansion.getNumLoads() >
-      TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
+  if (Expansion.getNumLoads() == 0) {
     NumMemCmpGreaterThanMax++;
     return false;
   }
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index 3cf26788123..64e62dae47a 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -13,6 +13,35 @@
 
 declare i32 @memcmp(i8*, i8*, i64)
 
+define i32 @length0(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length0:
+; X86:       # BB#0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length0:
+; X64:       # BB#0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
+   ret i32 %m
+ }
+
+define i1 @length0_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length0_eq:
+; X86:       # BB#0:
+; X86-NEXT:    movb $1, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length0_eq:
+; X64:       # BB#0:
+; X64-NEXT:    movb $1, %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
 define i32 @length2(i8* %X, i8* %Y) nounwind {
 ; X86-LABEL: length2:
 ; X86:       # BB#0:
@@ -120,14 +149,14 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    rolw $8, %dx
 ; X86-NEXT:    rolw $8, %si
 ; X86-NEXT:    cmpw %si, %dx
-; X86-NEXT:    jne .LBB4_1
+; X86-NEXT:    jne .LBB6_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movzbl 2(%eax), %eax
 ; X86-NEXT:    movzbl 2(%ecx), %ecx
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB4_1: # %res_block
+; X86-NEXT:  .LBB6_1: # %res_block
 ; X86-NEXT:    setae %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    leal -1(%eax,%eax), %eax
@@ -141,13 +170,13 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    rolw $8, %ax
 ; X64-NEXT:    rolw $8, %cx
 ; X64-NEXT:    cmpw %cx, %ax
-; X64-NEXT:    jne .LBB4_1
+; X64-NEXT:    jne .LBB6_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 2(%rdi), %eax
 ; X64-NEXT:    movzbl 2(%rsi), %ecx
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB4_1: # %res_block
+; X64-NEXT:  .LBB6_1: # %res_block
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    leal -1(%rax,%rax), %eax
@@ -163,15 +192,15 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzwl (%eax), %edx
 ; X86-NEXT:    cmpw (%ecx), %dx
-; X86-NEXT:    jne .LBB5_1
+; X86-NEXT:    jne .LBB7_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movb 2(%eax), %dl
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpb 2(%ecx), %dl
-; X86-NEXT:    je .LBB5_3
-; X86-NEXT:  .LBB5_1: # %res_block
+; X86-NEXT:    je .LBB7_3
+; X86-NEXT:  .LBB7_1: # %res_block
 ; X86-NEXT:    movl $1, %eax
-; X86-NEXT:  .LBB5_3: # %endblock
+; X86-NEXT:  .LBB7_3: # %endblock
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
@@ -180,15 +209,15 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
 ; X64:       # BB#0: # %loadbb
 ; X64-NEXT:    movzwl (%rdi), %eax
 ; X64-NEXT:    cmpw (%rsi), %ax
-; X64-NEXT:    jne .LBB5_1
+; X64-NEXT:    jne .LBB7_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movb 2(%rdi), %cl
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpb 2(%rsi), %cl
-; X64-NEXT:    je .LBB5_3
-; X64-NEXT:  .LBB5_1: # %res_block
+; X64-NEXT:    je .LBB7_3
+; X64-NEXT:  .LBB7_1: # %res_block
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:  .LBB5_3: # %endblock
+; X64-NEXT:  .LBB7_3: # %endblock
 ; X64-NEXT:    testl %eax, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
@@ -277,14 +306,14 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    bswapl %edx
 ; X86-NEXT:    bswapl %esi
 ; X86-NEXT:    cmpl %esi, %edx
-; X86-NEXT:    jne .LBB9_1
+; X86-NEXT:    jne .LBB11_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movzbl 4(%eax), %eax
 ; X86-NEXT:    movzbl 4(%ecx), %ecx
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB9_1: # %res_block
+; X86-NEXT:  .LBB11_1: # %res_block
 ; X86-NEXT:    setae %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    leal -1(%eax,%eax), %eax
@@ -298,13 +327,13 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
 ; X64-NEXT:    cmpl %ecx, %eax
-; X64-NEXT:    jne .LBB9_1
+; X64-NEXT:    jne .LBB11_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 4(%rdi), %eax
 ; X64-NEXT:    movzbl 4(%rsi), %ecx
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB9_1: # %res_block
+; X64-NEXT:  .LBB11_1: # %res_block
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    leal -1(%rax,%rax), %eax
@@ -320,15 +349,15 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %edx
 ; X86-NEXT:    cmpl (%ecx), %edx
-; X86-NEXT:    jne .LBB10_1
+; X86-NEXT:    jne .LBB12_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movb 4(%eax), %dl
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpb 4(%ecx), %dl
-; X86-NEXT:    je .LBB10_3
-; X86-NEXT:  .LBB10_1: # %res_block
+; X86-NEXT:    je .LBB12_3
+; X86-NEXT:  .LBB12_1: # %res_block
 ; X86-NEXT:    movl $1, %eax
-; X86-NEXT:  .LBB10_3: # %endblock
+; X86-NEXT:  .LBB12_3: # %endblock
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
@@ -337,15 +366,15 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
 ; X64:       # BB#0: # %loadbb
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    cmpl (%rsi), %eax
-; X64-NEXT:    jne .LBB10_1
+; X64-NEXT:    jne .LBB12_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movb 4(%rdi), %cl
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpb 4(%rsi), %cl
-; X64-NEXT:    je .LBB10_3
-; X64-NEXT:  .LBB10_1: # %res_block
+; X64-NEXT:    je .LBB12_3
+; X64-NEXT:  .LBB12_1: # %res_block
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:  .LBB10_3: # %endblock
+; X64-NEXT:  .LBB12_3: # %endblock
 ; X64-NEXT:    testl %eax, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
@@ -365,7 +394,7 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    bswapl %ecx
 ; X86-NEXT:    bswapl %edx
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    jne .LBB11_1
+; X86-NEXT:    jne .LBB13_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movl 4(%esi), %ecx
 ; X86-NEXT:    movl 4(%eax), %edx
@@ -373,11 +402,11 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    bswapl %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    jne .LBB11_1
+; X86-NEXT:    jne .LBB13_1
 ; X86-NEXT:  # BB#3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB11_1: # %res_block
+; X86-NEXT:  .LBB13_1: # %res_block
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
 ; X86-NEXT:    setae %al
@@ -407,15 +436,15 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %edx
 ; X86-NEXT:    cmpl (%ecx), %edx
-; X86-NEXT:    jne .LBB12_1
+; X86-NEXT:    jne .LBB14_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movl 4(%eax), %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl 4(%ecx), %edx
-; X86-NEXT:    je .LBB12_3
-; X86-NEXT:  .LBB12_1: # %res_block
+; X86-NEXT:    je .LBB14_3
+; X86-NEXT:  .LBB14_1: # %res_block
 ; X86-NEXT:    movl $1, %eax
-; X86-NEXT:  .LBB12_3: # %endblock
+; X86-NEXT:  .LBB14_3: # %endblock
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
@@ -436,14 +465,14 @@ define i1 @length8_eq_const(i8* %X) nounwind {
 ; X86:       # BB#0: # %loadbb
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    cmpl $858927408, (%ecx) # imm = 0x33323130
-; X86-NEXT:    jne .LBB13_1
+; X86-NEXT:    jne .LBB15_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl $926299444, 4(%ecx) # imm = 0x37363534
-; X86-NEXT:    je .LBB13_3
-; X86-NEXT:  .LBB13_1: # %res_block
+; X86-NEXT:    je .LBB15_3
+; X86-NEXT:  .LBB15_1: # %res_block
 ; X86-NEXT:    movl $1, %eax
-; X86-NEXT:  .LBB13_3: # %endblock
+; X86-NEXT:  .LBB15_3: # %endblock
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
@@ -476,15 +505,15 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
 ; X64:       # BB#0: # %loadbb
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    cmpq (%rsi), %rax
-; X64-NEXT:    jne .LBB14_1
+; X64-NEXT:    jne .LBB16_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movl 8(%rdi), %ecx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl 8(%rsi), %ecx
-; X64-NEXT:    je .LBB14_3
-; X64-NEXT:  .LBB14_1: # %res_block
+; X64-NEXT:    je .LBB16_3
+; X64-NEXT:  .LBB16_1: # %res_block
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:  .LBB14_3: # %endblock
+; X64-NEXT:  .LBB16_3: # %endblock
 ; X64-NEXT:    testl %eax, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
@@ -511,7 +540,7 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    bswapq %rcx
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB15_1
+; X64-NEXT:    jne .LBB17_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movl 8(%rdi), %ecx
 ; X64-NEXT:    movl 8(%rsi), %edx
@@ -519,10 +548,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    bswapl %edx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB15_1
+; X64-NEXT:    jne .LBB17_1
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB15_1: # %res_block
+; X64-NEXT:  .LBB17_1: # %res_block
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
 ; X64-NEXT:    setae %al
@@ -552,7 +581,7 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    bswapq %rcx
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB16_1
+; X64-NEXT:    jne .LBB18_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movq 8(%rdi), %rcx
 ; X64-NEXT:    movq 8(%rsi), %rdx
@@ -560,10 +589,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB16_1
+; X64-NEXT:    jne .LBB18_1
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB16_1: # %res_block
+; X64-NEXT:  .LBB18_1: # %res_block
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
 ; X64-NEXT:    setae %al
@@ -614,15 +643,15 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
 ; X64:       # BB#0: # %loadbb
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    cmpq (%rsi), %rax
-; X64-NEXT:    jne .LBB17_1
+; X64-NEXT:    jne .LBB19_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movq 8(%rdi), %rcx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq 8(%rsi), %rcx
-; X64-NEXT:    je .LBB17_3
-; X64-NEXT:  .LBB17_1: # %res_block
+; X64-NEXT:    je .LBB19_3
+; X64-NEXT:  .LBB19_1: # %res_block
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:  .LBB17_3: # %endblock
+; X64-NEXT:  .LBB19_3: # %endblock
 ; X64-NEXT:    testl %eax, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
@@ -670,15 +699,15 @@ define i1 @length16_eq_const(i8* %X) nounwind {
 ; X64:       # BB#0: # %loadbb
 ; X64-NEXT:    movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
 ; X64-NEXT:    cmpq %rax, (%rdi)
-; X64-NEXT:    jne .LBB18_1
+; X64-NEXT:    jne .LBB20_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938
 ; X64-NEXT:    cmpq %rcx, 8(%rdi)
-; X64-NEXT:    je .LBB18_3
-; X64-NEXT:  .LBB18_1: # %res_block
+; X64-NEXT:    je .LBB20_3
+; X64-NEXT:  .LBB20_1: # %res_block
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:  .LBB18_3: # %endblock
+; X64-NEXT:  .LBB20_3: # %endblock
 ; X64-NEXT:    testl %eax, %eax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
-- 
2.40.0