Experimental stab at using relexing to identify preprocessor

author Douglas Gregor <dgregor@apple.com>

Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)

committer Douglas Gregor <dgregor@apple.com>

Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)
author Douglas Gregor <dgregor@apple.com>
Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)
committer Douglas Gregor <dgregor@apple.com>
Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h

index 15afe8f503768a711bdd23114d48cef0b3658350..2042edcdb5217b1d5470f9748a32cc240d34d18b 100644 (file)
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -796,7 +796,13 @@ enum CXCursorKind {
  
    CXCursor_IBActionAttr                  = 401,
    CXCursor_IBOutletAttr                  = 402,
-  CXCursor_LastAttr                      = CXCursor_IBOutletAttr
+  CXCursor_LastAttr                      = CXCursor_IBOutletAttr,
+     
+  /* Preprocessing */
+  CXCursor_PreprocessingDirective        = 500,
+  
+  CXCursor_FirstPreprocessing            = CXCursor_PreprocessingDirective,
+  CXCursor_LastPreprocessing             = CXCursor_PreprocessingDirective
  };
  
  /**
@@ -888,6 +894,12 @@ CINDEX_LINKAGE unsigned clang_isInvalid(enum CXCursorKind);
   */
  CINDEX_LINKAGE unsigned clang_isTranslationUnit(enum CXCursorKind);
  
+/***
+ * \brief Determine whether the given cursor represents a preprocessing
+ * element, such as a preprocessor directive or macro instantiation.
+ */
+CINDEX_LINKAGE unsigned clang_isPreprocessing(enum CXCursorKind);
+  
  /***
   * \brief Determine whether the given cursor represents a currently
   *  unexposed piece of the AST (e.g., CXCursor_UnexposedStmt).
diff --git a/test/Index/annotate-tokens-pp.c b/test/Index/annotate-tokens-pp.c

new file mode 100644 (file)

index 0000000..32481fe
--- /dev/null
+++ b/test/Index/annotate-tokens-pp.c
@@ -0,0 +1,31 @@
+#define BAR baz
+#define WIBBLE(X, Y)
+WIBBLE(int, float)
+int BAR;
+#include "foo.h"
+
+// RUN: c-index-test -test-annotate-tokens=%s:1:1:6:1 -I%S/Inputs %s | FileCheck %s
+// CHECK: Punctuation: "#" [1:1 - 1:2] preprocessing directive=
+// CHECK: Identifier: "define" [1:2 - 1:8] preprocessing directive=
+// CHECK: Identifier: "BAR" [1:9 - 1:12] preprocessing directive=
+// CHECK: Identifier: "baz" [1:13 - 1:16] preprocessing directive=
+// CHECK: Punctuation: "#" [2:1 - 2:2] preprocessing directive=
+// CHECK: Identifier: "define" [2:2 - 2:8] preprocessing directive=
+// CHECK: Identifier: "WIBBLE" [2:9 - 2:15] preprocessing directive=
+// CHECK: Punctuation: "(" [2:15 - 2:16] preprocessing directive=
+// CHECK: Identifier: "X" [2:16 - 2:17] preprocessing directive=
+// CHECK: Punctuation: "," [2:17 - 2:18] preprocessing directive=
+// CHECK: Identifier: "Y" [2:19 - 2:20] preprocessing directive=
+// CHECK: Punctuation: ")" [2:20 - 2:21] preprocessing directive=
+// CHECK: Identifier: "WIBBLE" [3:1 - 3:7]
+// CHECK: Punctuation: "(" [3:7 - 3:8]
+// CHECK: Keyword: "int" [3:8 - 3:11]
+// CHECK: Punctuation: "," [3:11 - 3:12]
+// CHECK: Keyword: "float" [3:13 - 3:18]
+// CHECK: Punctuation: ")" [3:18 - 3:19]
+// CHECK: Keyword: "int" [4:1 - 4:4]
+// CHECK: Identifier: "BAR" [4:5 - 4:8]
+// CHECK: Punctuation: ";" [4:8 - 4:9]
+// CHECK: Punctuation: "#" [5:1 - 5:2] preprocessing directive=
+// CHECK: Identifier: "include" [5:2 - 5:9] preprocessing directive=
+// CHECK: Literal: ""foo.h"" [5:10 - 5:17] preprocessing directive=
diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp

index db27ffe68a46c162c0741ba6d87b09d251eddf99..61bfdba59090a1139ffec1d7b38915cb07a69391 100644 (file)
--- a/tools/CIndex/CIndex.cpp
+++ b/tools/CIndex/CIndex.cpp
@@ -1518,8 +1518,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
        return createCXString("UnexposedAttr");
    case CXCursor_IBActionAttr:
        return createCXString("attribute(ibaction)");
-    case CXCursor_IBOutletAttr:
-      return createCXString("attribute(iboutlet)");
+  case CXCursor_IBOutletAttr:
+     return createCXString("attribute(iboutlet)");
+  case CXCursor_PreprocessingDirective:
+    return createCXString("preprocessing directive");
    }
  
    llvm_unreachable("Unhandled CXCursorKind");
@@ -1590,6 +1592,10 @@ unsigned clang_isTranslationUnit(enum CXCursorKind K) {
    return K == CXCursor_TranslationUnit;
  }
  
+unsigned clang_isPreprocessing(enum CXCursorKind K) {
+  return K >= CXCursor_FirstPreprocessing && K <= CXCursor_LastPreprocessing;
+}
+  
  unsigned clang_isUnexposed(enum CXCursorKind K) {
    switch (K) {
      case CXCursor_UnexposedDecl:
@@ -1642,6 +1648,11 @@ CXSourceLocation clang_getCursorLocation(CXCursor C) {
      return cxloc::translateSourceLocation(getCursorContext(C),
                                     getLocationFromExpr(getCursorExpr(C)));
  
+  if (C.kind == CXCursor_PreprocessingDirective) {
+    SourceLocation L = cxcursor::getCursorPreprocessingDirective(C).getBegin();
+    return cxloc::translateSourceLocation(getCursorContext(C), L);
+  }
+  
    if (!getCursorDecl(C))
      return clang_getNullLocation();
  
@@ -1693,6 +1704,11 @@ CXSourceRange clang_getCursorExtent(CXCursor C) {
      return cxloc::translateSourceRange(getCursorContext(C),
                                  getCursorStmt(C)->getSourceRange());
  
+  if (C.kind == CXCursor_PreprocessingDirective) {
+    SourceRange R = cxcursor::getCursorPreprocessingDirective(C);
+    return cxloc::translateSourceRange(getCursorContext(C), R);
+  }
+  
    if (!getCursorDecl(C))
      return clang_getNullRange();
  
@@ -2216,7 +2232,8 @@ void clang_annotateTokens(CXTranslationUnit TU,
  
    ASTUnit::ConcurrencyCheck Check(*CXXUnit);
  
-  // Annotate all of the source locations in the region of interest that map
+  // Annotate all of the source locations in the region of interest that map to
+  // a specific cursor.
    SourceRange RegionOfInterest;
    RegionOfInterest.setBegin(
          cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[0])));
@@ -2224,23 +2241,114 @@ void clang_annotateTokens(CXTranslationUnit TU,
      = cxloc::translateSourceLocation(clang_getTokenLocation(TU,
                                                       Tokens[NumTokens - 1]));
    RegionOfInterest.setEnd(CXXUnit->getPreprocessor().getLocForEndOfToken(End));
-  // FIXME: Would be great to have a "hint" cursor, then walk from that
-  // hint cursor upward until we find a cursor whose source range encloses
-  // the region of interest, rather than starting from the translation unit.
+  
    AnnotateTokensData Annotated;
    CXCursor Parent = clang_getTranslationUnitCursor(CXXUnit);
    CursorVisitor AnnotateVis(CXXUnit, AnnotateTokensVisitor, &Annotated,
                              Decl::MaxPCHLevel, RegionOfInterest);
    AnnotateVis.VisitChildren(Parent);
  
+  // Look for macro instantiations and preprocessing directives in the 
+  // source range containing the annotated tokens. We do this by re-lexing the
+  // tokens in the source range.
+  SourceManager &SourceMgr = CXXUnit->getSourceManager();
+  std::pair<FileID, unsigned> BeginLocInfo
+    = SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin());
+  std::pair<FileID, unsigned> EndLocInfo
+    = SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd());
+  
+  bool RelexOkay = true;
+  
+  // Cannot re-tokenize across files.
+  if (BeginLocInfo.first != EndLocInfo.first)
+    RelexOkay = false;
+  
+  llvm::StringRef Buffer;
+  if (RelexOkay) {
+    // Create a lexer
+    bool Invalid = false;
+    Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid);
+    if (Invalid)
+      RelexOkay = false;
+  }
+    
+  if (RelexOkay) {
+    Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
+              CXXUnit->getASTContext().getLangOptions(),
+              Buffer.begin(), Buffer.data() + BeginLocInfo.second, Buffer.end());
+    Lex.SetCommentRetentionState(true);
+    
+    // Lex tokens in raw mode until we hit the end of the range, to avoid 
+    // entering #includes or expanding macros.
+    std::vector<Token> TokenStream;
+    const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second;
+    Preprocessor &PP = CXXUnit->getPreprocessor();
+    while (Lex.getBufferLocation() <= EffectiveBufferEnd) {
+      Token Tok;
+      Lex.LexFromRawLexer(Tok);
+      
+    reprocess:
+      if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+        // We have found a preprocessing directive. Gobble it up so that we
+        // don't see it while preprocessing these tokens later, but keep track of
+        // all of the token locations inside this preprocessing directive so that
+        // we can annotate them appropriately.
+        //
+        // FIXME: Some simple tests here could identify macro definitions and
+        // #undefs, to provide specific cursor kinds for those.
+        std::vector<SourceLocation> Locations;
+        do {
+          Locations.push_back(Tok.getLocation());
+          Lex.LexFromRawLexer(Tok);        
+        } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof));
+        
+        using namespace cxcursor;
+        CXCursor Cursor
+          = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(),
+                                                         Locations.back()),
+                                             CXXUnit);
+        for (unsigned I = 0, N = Locations.size(); I != N; ++I) {
+          Annotated[Locations[I].getRawEncoding()] = Cursor;
+        }
+        
+        if (Tok.is(tok::eof))
+          break;
+        
+        if (Tok.isAtStartOfLine())
+          goto reprocess;
+        
+        continue;
+      }
+      
+      // If this is a ## token, change its kind to unknown so that repreprocessing
+      // it will not produce an error.
+      if (Tok.is(tok::hashhash))
+        Tok.setKind(tok::unknown);
+      
+      // If this raw token is an identifier, the raw lexer won't have looked up
+      // the corresponding identifier info for it.  Do this now so that it will be
+      // macro expanded when we re-preprocess it.
+      if (Tok.is(tok::identifier)) {
+        // Change the kind of this identifier to the appropriate token kind, e.g.
+        // turning "for" into a keyword.
+        Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
+      }
+      
+      TokenStream.push_back(Tok);
+      
+      if (Tok.is(tok::eof)) 
+        break;
+    }
+  }
+  
    for (unsigned I = 0; I != NumTokens; ++I) {
      // Determine whether we saw a cursor at this token's location.
      AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]);
      if (Pos == Annotated.end())
        continue;
-
+    
      Cursors[I] = Pos->second;
-  }
+  }  
  }
  
  void clang_disposeTokens(CXTranslationUnit TU,
diff --git a/tools/CIndex/CIndex.exports b/tools/CIndex/CIndex.exports

index fe0396d2b1405824ca23cbae4edacf25735ecf2e..d036e5cfd8f2d59434681499d98b3d8507506a94 100644 (file)
--- a/tools/CIndex/CIndex.exports
+++ b/tools/CIndex/CIndex.exports
@@ -70,6 +70,7 @@ _clang_isCursorDefinition
  _clang_isDeclaration
  _clang_isExpression
  _clang_isInvalid
+_clang_isPreprocessing
  _clang_isReference
  _clang_isStatement
  _clang_isTranslationUnit
diff --git a/tools/CIndex/CXCursor.cpp b/tools/CIndex/CXCursor.cpp

index 0fa73a513da4d8e5464a58234d08318ef9138959..f2294b0996fb73eac59d8d78abb133eec0c6af35 100644 (file)
--- a/tools/CIndex/CXCursor.cpp
+++ b/tools/CIndex/CXCursor.cpp
@@ -296,6 +296,24 @@ cxcursor::getCursorTypeRef(CXCursor C) {
                                        reinterpret_cast<uintptr_t>(C.data[1])));
  }
  
+CXCursor cxcursor::MakePreprocessingDirectiveCursor(SourceRange Range, 
+                                                    ASTUnit *TU) {
+  CXCursor C = { CXCursor_PreprocessingDirective, 
+                 { reinterpret_cast<void *>(Range.getBegin().getRawEncoding()),
+                   reinterpret_cast<void *>(Range.getEnd().getRawEncoding()),
+                   TU }
+               };
+  return C;
+}
+
+SourceRange cxcursor::getCursorPreprocessingDirective(CXCursor C) {
+  assert(C.kind == CXCursor_PreprocessingDirective);
+  return SourceRange(SourceLocation::getFromRawEncoding(
+                                      reinterpret_cast<uintptr_t> (C.data[0])),
+                     SourceLocation::getFromRawEncoding(
+                                      reinterpret_cast<uintptr_t> (C.data[1])));
+}
+
  Decl *cxcursor::getCursorDecl(CXCursor Cursor) {
    return (Decl *)Cursor.data[0];
  }
diff --git a/tools/CIndex/CXCursor.h b/tools/CIndex/CXCursor.h

index 934d5e2aebd39ed32202f6052d7ea17585811d01..aa5d4f3a39832855ecd7363fb8b231c0c96ac2a9 100644 (file)
--- a/tools/CIndex/CXCursor.h
+++ b/tools/CIndex/CXCursor.h
@@ -73,6 +73,12 @@ CXCursor MakeCursorTypeRef(TypeDecl *Type, SourceLocation Loc, ASTUnit *TU);
  /// and optionally the location where the reference occurred.
  std::pair<TypeDecl *, SourceLocation> getCursorTypeRef(CXCursor C);
  
+/// \brief Create a preprocessing directive cursor.
+CXCursor MakePreprocessingDirectiveCursor(SourceRange Range, ASTUnit *TU);
+
+/// \brief Unpack a given preprocessing directive to retrieve its source range.
+SourceRange getCursorPreprocessingDirective(CXCursor C);
+  
  Decl *getCursorDecl(CXCursor Cursor);
  Expr *getCursorExpr(CXCursor Cursor);
  Stmt *getCursorStmt(CXCursor Cursor);
author	Douglas Gregor <dgregor@apple.com>
	Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)
committer	Douglas Gregor <dgregor@apple.com>
	Thu, 18 Mar 2010 00:42:48 +0000 (00:42 +0000)
include/clang-c/Index.h		patch \| blob \| history
test/Index/annotate-tokens-pp.c	[new file with mode: 0644]	patch \| blob
tools/CIndex/CIndex.cpp		patch \| blob \| history
tools/CIndex/CIndex.exports		patch \| blob \| history
tools/CIndex/CXCursor.cpp		patch \| blob \| history
tools/CIndex/CXCursor.h		patch \| blob \| history