From: Douglas Gregor Date: Tue, 26 Jan 2010 18:31:56 +0000 (+0000) Subject: Implement clang_annotateTokens(), which associates cursors with each X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0045e9fe1f7dfc37f1ea7bdb9b70bcdb6700f0c0;p=clang Implement clang_annotateTokens(), which associates cursors with each of the tokens within a raw token stream. This does not even attempt to handle macros yet. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@94561 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h index ff0a0e1f09..b041dcfd67 100644 --- a/include/clang-c/Index.h +++ b/include/clang-c/Index.h @@ -861,7 +861,11 @@ CINDEX_LINKAGE unsigned clang_isCursorDefinition(CXCursor); */ /** - * \defgroup CINDEX_LEX Lexing and syntactic analysis + * \defgroup CINDEX_LEX Token extraction and manipulation + * + * The routines in this group provide access to the tokens within a + * translation unit, along with a semantic mapping of those tokens to + * their corresponding cursors. * * @{ */ @@ -876,7 +880,7 @@ typedef enum CXTokenKind { CXToken_Punctuation, /** - * \brief A a language keyword. + * \brief A language keyword. */ CXToken_Keyword, @@ -952,9 +956,22 @@ CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, * \brief Annotate the given set of tokens by providing cursors for each token * that can be mapped to a specific entity within the abstract syntax tree. * - * This token-annotation routine is equivalent to invoking clang_getCursor() - * for the source locations of each of the tokens, then accepting only those - * cursors that refer to a specific token. + * This token-annotation routine is equivalent to invoking + * clang_getCursor() for the source locations of each of the + * tokens. The cursors provided are filtered, so that only those + * cursors that have a direct correspondence to the token are + * accepted. For example, given a function call \c f(x), + * clang_getCursor() would provide the following cursors: + * + * * when the cursor is over the 'f', a DeclRefExpr cursor referring to 'f'. + * * when the cursor is over the '(' or the ')', a CallExpr referring to 'f'. + * * when the cursor is over the 'x', a DeclRefExpr cursor referring to 'x'. + * + * Only the first and last of these cursors will occur within the + * annotate, since the tokens "f" and "x' directly refer to a function + * and a variable, respectively, but the parentheses are just a small + * part of the full syntax of the function call expression, which is + * not provided as an annotation. * * \param TU the translation unit that owns the given tokens. * diff --git a/test/Index/annotate-tokens.c b/test/Index/annotate-tokens.c index 6d2b4d24f0..b1af2ff14e 100644 --- a/test/Index/annotate-tokens.c +++ b/test/Index/annotate-tokens.c @@ -10,32 +10,32 @@ void f(void *ptr) { } // RUN: c-index-test -test-annotate-tokens=%s:4:1:9:32 %s | FileCheck %s -// CHECK: Identifier: "T" [4:3 - 4:3] +// CHECK: Identifier: "T" [4:3 - 4:3] TypeRef=T:1:13 // CHECK: Punctuation: "*" [4:4 - 4:4] -// CHECK: Identifier: "t_ptr" [4:6 - 4:10] +// CHECK: Identifier: "t_ptr" [4:6 - 4:10] VarDecl=t_ptr:4:6 (Definition) // CHECK: Punctuation: "=" [4:12 - 4:12] // CHECK: Punctuation: "(" [4:14 - 4:14] -// CHECK: Identifier: "T" [4:15 - 4:15] +// CHECK: Identifier: "T" [4:15 - 4:15] TypeRef=T:1:13 // CHECK: Punctuation: "*" [4:17 - 4:17] // CHECK: Punctuation: ")" [4:18 - 4:18] -// CHECK: Identifier: "ptr" [4:19 - 4:21] +// CHECK: Identifier: "ptr" [4:19 - 4:21] DeclRefExpr=ptr:3:14 // CHECK: Punctuation: ";" [4:22 - 4:22] // CHECK: Punctuation: "(" [5:3 - 5:3] // CHECK: Keyword: "void" [5:4 - 5:7] // CHECK: Punctuation: ")" [5:8 - 5:8] // CHECK: Keyword: "sizeof" [5:9 - 5:14] // CHECK: Punctuation: "(" [5:15 - 5:15] -// CHECK: Identifier: "T" [5:16 - 5:16] +// CHECK: Identifier: "T" [5:16 - 5:16] TypeRef=T:1:13 // CHECK: Punctuation: ")" [5:17 - 5:17] // CHECK: Punctuation: ";" [5:18 - 5:18] // CHECK: Comment: "/* A comment */" [6:3 - 6:17] // CHECK: Keyword: "struct" [7:3 - 7:8] -// CHECK: Identifier: "X" [7:10 - 7:10] -// CHECK: Identifier: "x" [7:12 - 7:12] +// CHECK: Identifier: "X" [7:10 - 7:10] TypeRef=struct X:2:8 +// CHECK: Identifier: "x" [7:12 - 7:12] VarDecl=x:7:12 (Definition) // CHECK: Punctuation: "=" [7:14 - 7:14] // CHECK: Punctuation: "(" [7:16 - 7:16] // CHECK: Keyword: "struct" [7:17 - 7:22] -// CHECK: Identifier: "X" [7:24 - 7:24] +// CHECK: Identifier: "X" [7:24 - 7:24] TypeRef=struct X:2:8 // CHECK: Punctuation: ")" [7:25 - 7:25] // CHECK: Punctuation: "{" [7:26 - 7:26] // CHECK: Literal: "1" [7:27 - 7:27] @@ -45,18 +45,18 @@ void f(void *ptr) { // CHECK: Punctuation: ";" [7:32 - 7:32] // CHECK: Keyword: "void" [8:3 - 8:6] // CHECK: Punctuation: "*" [8:8 - 8:8] -// CHECK: Identifier: "xx" [8:9 - 8:10] +// CHECK: Identifier: "xx" [8:9 - 8:10] VarDecl=xx:8:9 (Definition) // CHECK: Punctuation: "=" [8:12 - 8:12] -// CHECK: Identifier: "ptr" [8:14 - 8:16] +// CHECK: Identifier: "ptr" [8:14 - 8:16] DeclRefExpr=ptr:3:14 // CHECK: Punctuation: "?" [8:18 - 8:18] // CHECK: Punctuation: ":" [8:20 - 8:20] // CHECK: Punctuation: "&" [8:22 - 8:22] -// CHECK: Identifier: "x" [8:23 - 8:23] +// CHECK: Identifier: "x" [8:23 - 8:23] DeclRefExpr=x:7:12 // CHECK: Punctuation: ";" [8:24 - 8:24] // CHECK: Keyword: "const" [9:3 - 9:7] // CHECK: Keyword: "char" [9:9 - 9:12] // CHECK: Punctuation: "*" [9:14 - 9:14] -// CHECK: Identifier: "hello" [9:16 - 9:20] +// CHECK: Identifier: "hello" [9:16 - 9:20] VarDecl=hello:9:16 (Definition) // CHECK: Punctuation: "=" [9:22 - 9:22] // CHECK: Literal: ""Hello"" [9:24 - 9:30] // CHECK: Punctuation: ";" [9:31 - 9:31] diff --git a/tools/CIndex/CIndex.cpp b/tools/CIndex/CIndex.cpp index 55061cba71..3a5977970a 100644 --- a/tools/CIndex/CIndex.cpp +++ b/tools/CIndex/CIndex.cpp @@ -2056,19 +2056,86 @@ void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, memmove(*Tokens, CXTokens.data(), sizeof(CXToken) * CXTokens.size()); *NumTokens = CXTokens.size(); } + +typedef llvm::DenseMap AnnotateTokensData; + +enum CXChildVisitResult AnnotateTokensVisitor(CXCursor cursor, + CXCursor parent, + CXClientData client_data) { + AnnotateTokensData *Data = static_cast(client_data); + + // We only annotate the locations of declarations, simple + // references, and expressions which directly reference something. + CXCursorKind Kind = clang_getCursorKind(cursor); + if (clang_isDeclaration(Kind) || clang_isReference(Kind)) { + // Okay: We can annotate the location of this declaration with the + // declaration or reference + } else if (clang_isExpression(cursor.kind)) { + if (Kind != CXCursor_DeclRefExpr && + Kind != CXCursor_MemberRefExpr && + Kind != CXCursor_ObjCMessageExpr) + return CXChildVisit_Recurse; + + CXCursor Referenced = clang_getCursorReferenced(cursor); + if (Referenced == cursor || Referenced == clang_getNullCursor()) + return CXChildVisit_Recurse; + + // Okay: we can annotate the location of this expression + } else { + // Nothing to annotate + return CXChildVisit_Recurse; + } + CXSourceLocation Loc = clang_getCursorLocation(cursor); + (*Data)[Loc.int_data] = cursor; + return CXChildVisit_Recurse; +} + void clang_annotateTokens(CXTranslationUnit TU, CXToken *Tokens, unsigned NumTokens, CXCursor *Cursors) { - // FIXME: Actually perform some meaningful lookup here. + if (NumTokens == 0) + return; + + // Any token we don't specifically annotate will have a NULL cursor. for (unsigned I = 0; I != NumTokens; ++I) Cursors[I] = clang_getNullCursor(); + + ASTUnit *CXXUnit = static_cast(TU); + if (!CXXUnit || !Tokens) + return; + + // Annotate all of the source locations in the region of interest that map + SourceRange RegionOfInterest; + RegionOfInterest.setBegin( + cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[0]))); + SourceLocation End + = cxloc::translateSourceLocation(clang_getTokenLocation(TU, + Tokens[NumTokens - 1])); + RegionOfInterest.setEnd(CXXUnit->getPreprocessor().getLocForEndOfToken(End, + 1)); + // FIXME: Would be great to have a "hint" cursor, then walk from that + // hint cursor upward until we find a cursor whose source range encloses + // the region of interest, rather than starting from the translation unit. + AnnotateTokensData Annotated; + CXCursor Parent = clang_getTranslationUnitCursor(CXXUnit); + CursorVisitor AnnotateVis(CXXUnit, AnnotateTokensVisitor, &Annotated, + Decl::MaxPCHLevel, RegionOfInterest); + AnnotateVis.VisitChildren(Parent); + + for (unsigned I = 0; I != NumTokens; ++I) { + // Determine whether we saw a cursor at this token's location. + AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]); + if (Pos == Annotated.end()) + continue; + + Cursors[I] = Pos->second; + } } void clang_disposeTokens(CXTranslationUnit TU, CXToken *Tokens, unsigned NumTokens) { - if (Tokens) - free(Tokens); + free(Tokens); } } // end: extern "C" diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c index 1015aa5e74..243b8736f4 100644 --- a/tools/c-index-test/c-index-test.c +++ b/tools/c-index-test/c-index-test.c @@ -810,8 +810,13 @@ int perform_token_annotation(int argc, const char **argv) { 0, &start_line, &start_column); clang_getInstantiationLocation(clang_getRangeEnd(extent), 0, &end_line, &end_column); - printf("%s: \"%s\" [%d:%d - %d:%d]\n", kind, clang_getCString(spelling), + printf("%s: \"%s\" [%d:%d - %d:%d]", kind, clang_getCString(spelling), start_line, start_column, end_line, end_column); + if (!clang_isInvalid(cursors[i].kind)) { + printf(" "); + PrintCursor(cursors[i]); + } + printf("\n"); } free(cursors);