PR25416: Improve performance of processing inline assembly consisting of many

author Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)

committer Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)
author Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)
committer Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h

index 284bc87b3feee7579d0698b1b48457401ac1f9fc..58fc4e1ead27bbfcf8dc253b5429c7c78aa6e571 100644 (file)
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1631,13 +1631,15 @@ public:
    /// and can have escape sequences in them in addition to the usual trigraph
    /// and escaped newline business.  This routine handles this complexity.
    ///
-  SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
-                                   const LangOptions &Features,
-                                   const TargetInfo &Target) const;
+  SourceLocation
+  getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                    const LangOptions &Features, const TargetInfo &Target,
+                    unsigned *StartToken = nullptr,
+                    unsigned *StartTokenByteOffset = nullptr) const;
  
    typedef const SourceLocation *tokloc_iterator;
    tokloc_iterator tokloc_begin() const { return TokLocs; }
-  tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
+  tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
  
    SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; }
    SourceLocation getLocEnd() const LLVM_READONLY {
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp

index 9996546a176f9ce4f3fee3cfcb2860ec8f6ed9d0..69b1121f3dc877ab378a3841bc327b3f2236dae1 100644 (file)
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1007,15 +1007,33 @@ void StringLiteral::setString(const ASTContext &C, StringRef Str,
  /// can have escape sequences in them in addition to the usual trigraph and
  /// escaped newline business.  This routine handles this complexity.
  ///
-SourceLocation StringLiteral::
-getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
-                  const LangOptions &Features, const TargetInfo &Target) const {
+/// The *StartToken sets the first token to be searched in this function and
+/// the *StartTokenByteOffset is the byte offset of the first token. Before
+/// returning, it updates the *StartToken to the TokNo of the token being found
+/// and sets *StartTokenByteOffset to the byte offset of the token in the
+/// string.
+/// Using these two parameters can reduce the time complexity from O(n^2) to
+/// O(n) if one wants to get the location of byte for all the tokens in a
+/// string.
+///
+SourceLocation
+StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                                 const LangOptions &Features,
+                                 const TargetInfo &Target, unsigned *StartToken,
+                                 unsigned *StartTokenByteOffset) const {
    assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
           "Only narrow string literals are currently supported");
  
    // Loop over all of the tokens in this string until we find the one that
    // contains the byte we're looking for.
    unsigned TokNo = 0;
+  unsigned StringOffset = 0;
+  if (StartToken)
+    TokNo = *StartToken;
+  if (StartTokenByteOffset) {
+    StringOffset = *StartTokenByteOffset;
+    ByteNo -= StringOffset;
+  }
    while (1) {
      assert(TokNo < getNumConcatenated() && "Invalid byte number!");
      SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
@@ -1024,14 +1042,20 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
      // the string literal, not the identifier for the macro it is potentially
      // expanded through.
      SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
-    
+
      // Re-lex the token to get its length and original spelling.
-    std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+    std::pair<FileID, unsigned> LocInfo =
+        SM.getDecomposedLoc(StrTokSpellingLoc);
      bool Invalid = false;
      StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
+    if (Invalid) {
+      if (StartTokenByteOffset != nullptr)
+        *StartTokenByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
        return StrTokSpellingLoc;
-    
+    }
+
      const char *StrData = Buffer.data()+LocInfo.second;
      
      // Create a lexer starting at the beginning of this token.
@@ -1047,14 +1071,19 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
      // If the byte is in this token, return the location of the byte.
      if (ByteNo < TokNumBytes ||
          (ByteNo == TokNumBytes && TokNo == getNumConcatenated() - 1)) {
-      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
-      
+      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo);
+
        // Now that we know the offset of the token in the spelling, use the
        // preprocessor to get the offset in the original source.
+      if (StartTokenByteOffset != nullptr)
+        *StartTokenByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
        return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
      }
-    
+
      // Move to the next string token.
+    StringOffset += TokNumBytes;
      ++TokNo;
      ByteNo -= TokNumBytes;
    }
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp

index 1cbdeffd7c4f12fd538eb80b64d857be35ef51b8..b0ca80e941411a4d75279d779ab3e52de3a5cfdd 100644 (file)
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -1714,13 +1714,15 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
    if (!StrVal.empty()) {
      const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
      const LangOptions &LangOpts = CGF.CGM.getLangOpts();
+    unsigned StartToken = 0;
+    unsigned ByteOffset = 0;
  
      // Add the location of the start of each subsequent line of the asm to the
      // MDNode.
-    for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) {
+    for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
        if (StrVal[i] != '\n') continue;
-      SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
-                                                      CGF.getTarget());
+      SourceLocation LineLoc = Str->getLocationOfByte(
+          i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
        Locs.push_back(llvm::ConstantAsMetadata::get(
            llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
      }
author	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)
committer	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 10 Dec 2015 01:11:47 +0000 (01:11 +0000)
include/clang/AST/Expr.h		patch \| blob \| history
lib/AST/Expr.cpp		patch \| blob \| history
lib/CodeGen/CGStmt.cpp		patch \| blob \| history