From cff9cc95de367a3aea885a7f8fee304fe2707b92 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 12 Oct 2008 05:44:03 +0000 Subject: [PATCH] start implementing a token rewriter. At this point, it just reads in a file and lets a client iterate over it. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@57407 91177308-0d34-0410-b5e6-96231b3b80d8 --- Driver/RewriteTest.cpp | 10 ++++- clang.xcodeproj/project.pbxproj | 4 ++ include/clang/Basic/SourceLocation.h | 5 +++ include/clang/Rewrite/TokenRewriter.h | 60 +++++++++++++++++++++++++++ lib/Rewrite/TokenRewriter.cpp | 53 +++++++++++++++++++++++ 5 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 include/clang/Rewrite/TokenRewriter.h create mode 100644 lib/Rewrite/TokenRewriter.cpp diff --git a/Driver/RewriteTest.cpp b/Driver/RewriteTest.cpp index 446ae1a4fb..6709860272 100644 --- a/Driver/RewriteTest.cpp +++ b/Driver/RewriteTest.cpp @@ -21,6 +21,12 @@ void clang::DoRewriteTest(Preprocessor &PP, const std::string &InFileName, SourceManager &SM = PP.getSourceManager(); const LangOptions &LangOpts = PP.getLangOptions(); + TokenRewriter Rewriter(SM.getMainFileID(), SM, LangOpts); + + + + + std::pair File =SM.getBufferData(SM.getMainFileID()); // Create a lexer to lex all the tokens of the main file in raw mode. Even @@ -37,5 +43,7 @@ void clang::DoRewriteTest(Preprocessor &PP, const std::string &InFileName, RawLex.LexFromRawLexer(RawTok); } - + for (TokenRewriter::token_iterator I = Rewriter.token_begin(), + E = Rewriter.token_end(); I != E; ++I) + std::cout << PP.getSpelling(*I); } \ No newline at end of file diff --git a/clang.xcodeproj/project.pbxproj b/clang.xcodeproj/project.pbxproj index c6e50213b9..9e7791af5d 100644 --- a/clang.xcodeproj/project.pbxproj +++ b/clang.xcodeproj/project.pbxproj @@ -123,6 +123,7 @@ DE4772FC0C10EAEC002239E8 /* CGExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */; }; DE47999C0D2EBE1A00706D2D /* SemaExprObjC.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE47999B0D2EBE1A00706D2D /* SemaExprObjC.cpp */; }; DE4DC79E0EA1C09E00069E5A /* RewriteTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4DC79D0EA1C09E00069E5A /* RewriteTest.cpp */; }; + DE4DC7A30EA1C33E00069E5A /* TokenRewriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */; }; DE5932D10AD60FF400BC794C /* clang.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CD0AD60FF400BC794C /* clang.cpp */; }; DE5932D20AD60FF400BC794C /* clang.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE5932CE0AD60FF400BC794C /* clang.h */; }; DE5932D30AD60FF400BC794C /* PrintParserCallbacks.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */; }; @@ -446,6 +447,7 @@ DE47999B0D2EBE1A00706D2D /* SemaExprObjC.cpp */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.cpp.cpp; name = SemaExprObjC.cpp; path = lib/Sema/SemaExprObjC.cpp; sourceTree = ""; tabWidth = 2; }; DE4DC7980EA1BE4400069E5A /* TokenRewriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TokenRewriter.h; path = clang/Rewrite/TokenRewriter.h; sourceTree = ""; }; DE4DC79D0EA1C09E00069E5A /* RewriteTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RewriteTest.cpp; path = Driver/RewriteTest.cpp; sourceTree = ""; }; + DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TokenRewriter.cpp; path = lib/Rewrite/TokenRewriter.cpp; sourceTree = ""; }; DE53370B0CE2D96F00D9A028 /* RewriteRope.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RewriteRope.h; path = clang/Rewrite/RewriteRope.h; sourceTree = ""; }; DE5932CD0AD60FF400BC794C /* clang.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = clang.cpp; path = Driver/clang.cpp; sourceTree = ""; }; DE5932CE0AD60FF400BC794C /* clang.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = clang.h; path = Driver/clang.h; sourceTree = ""; }; @@ -1073,6 +1075,7 @@ 72D16C1E0D9975C400E6DA4A /* HTMLRewrite.cpp */, DEF7D9F80C9C8B1D0001F598 /* Rewriter.cpp */, DECAB0CF0DB3C84200E13CCB /* RewriteRope.cpp */, + DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */, ); name = Rewrite; sourceTree = ""; @@ -1259,6 +1262,7 @@ 3551068C0E9A8546006A4E44 /* ParsePragma.cpp in Sources */, 3551068D0E9A8546006A4E44 /* ParseTentative.cpp in Sources */, DE4DC79E0EA1C09E00069E5A /* RewriteTest.cpp in Sources */, + DE4DC7A30EA1C33E00069E5A /* TokenRewriter.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h index ee6cc145fe..2e96f133b4 100644 --- a/include/clang/Basic/SourceLocation.h +++ b/include/clang/Basic/SourceLocation.h @@ -155,6 +155,11 @@ public: /// directly. unsigned getRawEncoding() const { return ID; } + + bool operator<(const SourceLocation &RHS) const { + return ID < RHS.ID; + } + /// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into /// a real SourceLocation. static SourceLocation getFromRawEncoding(unsigned Encoding) { diff --git a/include/clang/Rewrite/TokenRewriter.h b/include/clang/Rewrite/TokenRewriter.h new file mode 100644 index 0000000000..da0141ff34 --- /dev/null +++ b/include/clang/Rewrite/TokenRewriter.h @@ -0,0 +1,60 @@ +//===--- TokenRewriter.h - Token-based Rewriter -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenRewriter class, which is used for code +// transformations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOKENREWRITER_H +#define LLVM_CLANG_TOKENREWRITER_H + +#include "clang/Basic/SourceLocation.h" +#include +#include + +namespace clang { + class Token; + class LangOptions; + + class TokenRewriter { + /// TokenList - This is the list of raw tokens that make up this file. Each + /// of these tokens has a unique SourceLocation, which is a FileID. + std::list TokenList; + + /// TokenRefTy - This is the type used to refer to a token in the TokenList. + typedef std::list::iterator TokenRefTy; + + /// TokenAtLoc - This map indicates which token exists at a specific + /// SourceLocation. Since each token has a unique SourceLocation, this is a + /// one to one map. The token can return its own location directly, to map + /// backwards. + std::map TokenAtLoc; + + public: + /// TokenRewriter - This creates a TokenRewriter for the file with the + /// specified FileID. + TokenRewriter(unsigned FileID, SourceManager &SM, const LangOptions &LO); + + + typedef std::list::const_iterator token_iterator; + token_iterator token_begin() const { return TokenList.begin(); } + token_iterator token_end() const { return TokenList.end(); } + + private: + /// AddToken - Add the specified token into the Rewriter before the other + /// position. + void AddToken(const Token &T, TokenRefTy Where); + }; + + + +} // end namespace clang + +#endif diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp new file mode 100644 index 0000000000..0362bacdfc --- /dev/null +++ b/lib/Rewrite/TokenRewriter.cpp @@ -0,0 +1,53 @@ +//===--- TokenRewriter.cpp - Token-based code rewriting interface ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenRewriter class, which is used for code +// transformations. +// +//===----------------------------------------------------------------------===// + +#include "clang/Rewrite/TokenRewriter.h" +#include "clang/Lex/Lexer.h" +#include "clang/Basic/SourceManager.h" +using namespace clang; + +TokenRewriter::TokenRewriter(unsigned FileID, SourceManager &SM, + const LangOptions &LangOpts) { + + std::pair File = SM.getBufferData(FileID); + + // Create a lexer to lex all the tokens of the main file in raw mode. + Lexer RawLex(SourceLocation::getFileLoc(FileID, 0), + LangOpts, File.first, File.second); + + // Return all comments and whitespace as tokens. + RawLex.SetKeepWhitespaceMode(true); + + // Lex the file, populating our datastructures. + Token RawTok; + RawLex.LexFromRawLexer(RawTok); + while (RawTok.isNot(tok::eof)) { + AddToken(RawTok, TokenList.end()); + RawLex.LexFromRawLexer(RawTok); + } + + +} + +/// AddToken - Add the specified token into the Rewriter before the other +/// position. +void TokenRewriter::AddToken(const Token &T, TokenRefTy Where) { + Where = TokenList.insert(Where, T); + + bool InsertSuccess = TokenAtLoc.insert(std::make_pair(T.getLocation(), + Where)).second; + assert(InsertSuccess && "Token location already in rewriter!"); + InsertSuccess = InsertSuccess; +} + -- 2.40.0